1 /* $Id: resolve_base.cpp 629837 2021-04-22 12:47:49Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Sergey Satskiy
27 *
28 * File Description: base class for processors which need to resolve seq_id
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33
34
35 #include <corelib/request_status.hpp>
36 #include <corelib/ncbidiag.hpp>
37
38 #include "pubseq_gateway.hpp"
39 #include "pubseq_gateway_utils.hpp"
40 #include "pubseq_gateway_cache_utils.hpp"
41 #include "cass_fetch.hpp"
42 #include "psgs_request.hpp"
43 #include "psgs_reply.hpp"
44 #include "insdc_utils.hpp"
45 #include "resolve_base.hpp"
46
47 #include <objects/seqloc/Seq_id.hpp>
48 #include <objects/general/Dbtag.hpp>
49 #include <objects/general/Object_id.hpp>
50 USING_IDBLOB_SCOPE;
51 USING_SCOPE(objects);
52
53 using namespace std::placeholders;
54
55
CPSGS_ResolveBase()56 CPSGS_ResolveBase::CPSGS_ResolveBase()
57 {}
58
59
CPSGS_ResolveBase(shared_ptr<CPSGS_Request> request,shared_ptr<CPSGS_Reply> reply,TSeqIdResolutionFinishedCB finished_cb,TSeqIdResolutionErrorCB error_cb,TSeqIdResolutionStartProcessingCB resolution_start_processing_cb)60 CPSGS_ResolveBase::CPSGS_ResolveBase(shared_ptr<CPSGS_Request> request,
61 shared_ptr<CPSGS_Reply> reply,
62 TSeqIdResolutionFinishedCB finished_cb,
63 TSeqIdResolutionErrorCB error_cb,
64 TSeqIdResolutionStartProcessingCB resolution_start_processing_cb) :
65 CPSGS_AsyncResolveBase(request, reply,
66 bind(&CPSGS_ResolveBase::x_OnSeqIdResolveFinished,
67 this, _1),
68 bind(&CPSGS_ResolveBase::x_OnSeqIdResolveError,
69 this, _1, _2, _3, _4),
70 bind(&CPSGS_ResolveBase::x_OnResolutionGoodData,
71 this)),
72 CPSGS_AsyncBioseqInfoBase(request, reply,
73 bind(&CPSGS_ResolveBase::x_OnSeqIdResolveFinished,
74 this, _1),
75 bind(&CPSGS_ResolveBase::x_OnSeqIdResolveError,
76 this, _1, _2, _3, _4)),
77 m_FinalFinishedCB(finished_cb),
78 m_FinalErrorCB(error_cb),
79 m_FinalStartProcessingCB(resolution_start_processing_cb),
80 m_AsyncStarted(false)
81 {}
82
83
~CPSGS_ResolveBase()84 CPSGS_ResolveBase::~CPSGS_ResolveBase()
85 {}
86
87
88 SPSGS_RequestBase::EPSGS_CacheAndDbUse
x_GetRequestUseCache(void)89 CPSGS_ResolveBase::x_GetRequestUseCache(void)
90 {
91 switch (m_Request->GetRequestType()) {
92 case CPSGS_Request::ePSGS_ResolveRequest:
93 return m_Request->GetRequest<SPSGS_ResolveRequest>().m_UseCache;
94 case CPSGS_Request::ePSGS_BlobBySeqIdRequest:
95 return m_Request->GetRequest<SPSGS_BlobBySeqIdRequest>().m_UseCache;
96 case CPSGS_Request::ePSGS_AnnotationRequest:
97 return m_Request->GetRequest<SPSGS_AnnotRequest>().m_UseCache;
98 default:
99 break;
100 }
101 NCBI_THROW(CPubseqGatewayException, eLogic,
102 "Not handled request type " +
103 to_string(static_cast<int>(m_Request->GetRequestType())));
104 }
105
106
107 bool
x_GetEffectiveSeqIdType(const CSeq_id & parsed_seq_id,int16_t & eff_seq_id_type,bool need_trace)108 CPSGS_ResolveBase::x_GetEffectiveSeqIdType(
109 const CSeq_id & parsed_seq_id,
110 int16_t & eff_seq_id_type,
111 bool need_trace)
112 {
113 auto parsed_seq_id_type = parsed_seq_id.Which();
114 bool parsed_seq_id_type_found = (parsed_seq_id_type !=
115 CSeq_id_Base::e_not_set);
116 auto request_seq_id_type = GetRequestSeqIdType();
117
118 if (!parsed_seq_id_type_found && request_seq_id_type < 0) {
119 eff_seq_id_type = -1;
120 return true;
121 }
122
123 if (!parsed_seq_id_type_found) {
124 eff_seq_id_type = request_seq_id_type;
125 return true;
126 }
127
128 if (request_seq_id_type < 0) {
129 eff_seq_id_type = parsed_seq_id_type;
130 return true;
131 }
132
133 // Both found
134 if (parsed_seq_id_type == request_seq_id_type) {
135 eff_seq_id_type = request_seq_id_type;
136 return true;
137 }
138
139 // The parsed and url explicit seq_id_type do not match
140 if (IsINSDCSeqIdType(parsed_seq_id_type) &&
141 IsINSDCSeqIdType(request_seq_id_type)) {
142 if (need_trace) {
143 m_Reply->SendTrace(
144 "Seq id type mismatch. Parsed CSeq_id reports seq_id_type as " +
145 to_string(parsed_seq_id_type) + " while the URL reports " +
146 to_string(request_seq_id_type) + ". They both belong to INSDC types so "
147 "CSeq_id provided type " + to_string(parsed_seq_id_type) +
148 " is taken as an effective one",
149 m_Request->GetStartTimestamp());
150 }
151 eff_seq_id_type = parsed_seq_id_type;
152 return true;
153 }
154
155 return false;
156 }
157
158
159 EPSGS_SeqIdParsingResult
x_ParseInputSeqId(CSeq_id & seq_id,string & err_msg)160 CPSGS_ResolveBase::x_ParseInputSeqId(CSeq_id & seq_id,
161 string & err_msg)
162 {
163 bool need_trace = m_Request->NeedTrace();
164 auto request_seq_id = GetRequestSeqId();
165 auto request_seq_id_type = GetRequestSeqIdType();
166
167 try {
168 seq_id.Set(request_seq_id);
169 if (need_trace)
170 m_Reply->SendTrace("Parsing CSeq_id('" + request_seq_id +
171 "') succeeded", m_Request->GetStartTimestamp());
172
173 if (request_seq_id_type <= 0) {
174 if (need_trace)
175 m_Reply->SendTrace("Parsing CSeq_id finished OK (#1)",
176 m_Request->GetStartTimestamp());
177 return ePSGS_ParsedOK;
178 }
179
180 // Check the parsed type with the given
181 int16_t eff_seq_id_type;
182 if (x_GetEffectiveSeqIdType(seq_id, eff_seq_id_type, false)) {
183 if (need_trace)
184 m_Reply->SendTrace("Parsing CSeq_id finished OK (#2)",
185 m_Request->GetStartTimestamp());
186 return ePSGS_ParsedOK;
187 }
188
189 // seq_id_type from URL and from CSeq_id differ
190 CSeq_id_Base::E_Choice seq_id_type = seq_id.Which();
191
192 if (need_trace)
193 m_Reply->SendTrace("CSeq_id provided type " + to_string(seq_id_type) +
194 " and URL provided seq_id_type " +
195 to_string(request_seq_id_type) + " mismatch",
196 m_Request->GetStartTimestamp());
197
198 if (IsINSDCSeqIdType(request_seq_id_type) &&
199 IsINSDCSeqIdType(seq_id_type)) {
200 // Both seq_id_types belong to INSDC
201 if (need_trace) {
202 m_Reply->SendTrace("Both types belong to INSDC types.\n"
203 "Parsing CSeq_id finished OK (#3)",
204 m_Request->GetStartTimestamp());
205 }
206 return ePSGS_ParsedOK;
207 }
208
209 // Type mismatch: form the error message in case of resolution problems
210 err_msg = "Seq_id '" + request_seq_id +
211 "' possible type mismatch: the URL provides " +
212 to_string(request_seq_id_type) +
213 " while the CSeq_Id detects it as " +
214 to_string(static_cast<int>(seq_id_type));
215 } catch (...) {
216 if (need_trace)
217 m_Reply->SendTrace("Parsing CSeq_id('" + request_seq_id +
218 "') failed (exception)",
219 m_Request->GetStartTimestamp());
220 }
221
222 // Second variation of Set()
223 if (request_seq_id_type > 0) {
224 try {
225 seq_id.Set(CSeq_id::eFasta_AsTypeAndContent,
226 (CSeq_id_Base::E_Choice)(request_seq_id_type),
227 request_seq_id);
228 if (need_trace) {
229 m_Reply->SendTrace("Parsing CSeq_id(eFasta_AsTypeAndContent, " +
230 to_string(request_seq_id_type) +
231 ", '" + request_seq_id + "') succeeded.\n"
232 "Parsing CSeq_id finished OK (#4)",
233 m_Request->GetStartTimestamp());
234 }
235 return ePSGS_ParsedOK;
236 } catch (...) {
237 if (need_trace)
238 m_Reply->SendTrace("Parsing CSeq_id(eFasta_AsTypeAndContent, " +
239 to_string(request_seq_id_type) +
240 ", '" + request_seq_id + "') failed (exception)",
241 m_Request->GetStartTimestamp());
242 }
243 }
244
245 if (need_trace) {
246 m_Reply->SendTrace("Parsing CSeq_id finished FAILED",
247 m_Request->GetStartTimestamp());
248 }
249
250 return ePSGS_ParseFailed;
251 }
252
253
254 bool
x_ComposeOSLT(CSeq_id & parsed_seq_id,int16_t & effective_seq_id_type,list<string> & secondary_id_list,string & primary_id)255 CPSGS_ResolveBase::x_ComposeOSLT(CSeq_id & parsed_seq_id,
256 int16_t & effective_seq_id_type,
257 list<string> & secondary_id_list,
258 string & primary_id)
259 {
260 bool need_trace = m_Request->NeedTrace();
261
262 if (!x_GetEffectiveSeqIdType(parsed_seq_id,
263 effective_seq_id_type, need_trace)) {
264 if (need_trace) {
265 m_Reply->SendTrace("OSLT has not been tried due to mismatch "
266 "between the parsed CSeq_id seq_id_type and "
267 "the URL provided one",
268 m_Request->GetStartTimestamp());
269 }
270 return false;
271 }
272
273 try {
274 primary_id = parsed_seq_id.ComposeOSLT(&secondary_id_list,
275 CSeq_id::fGpipeAddSecondary);
276 } catch (...) {
277 if (need_trace) {
278 m_Reply->SendTrace("OSLT call failure (exception)",
279 m_Request->GetStartTimestamp());
280 }
281 return false;
282 }
283
284 if (need_trace) {
285 string trace_msg("OSLT succeeded");
286 trace_msg += "\nOSLT primary id: " + primary_id;
287
288 if (secondary_id_list.empty()) {
289 trace_msg += "\nOSLT secondary id list is empty";
290 } else {
291 for (const auto & item : secondary_id_list) {
292 trace_msg += "\nOSLT secondary id: " + item;
293 }
294 }
295 m_Reply->SendTrace(trace_msg, m_Request->GetStartTimestamp());
296 }
297
298 return true;
299 }
300
301
302 EPSGS_CacheLookupResult
x_ResolvePrimaryOSLTInCache(const string & primary_id,int16_t effective_version,int16_t effective_seq_id_type,SBioseqResolution & bioseq_resolution)303 CPSGS_ResolveBase::x_ResolvePrimaryOSLTInCache(
304 const string & primary_id,
305 int16_t effective_version,
306 int16_t effective_seq_id_type,
307 SBioseqResolution & bioseq_resolution)
308 {
309 EPSGS_CacheLookupResult bioseq_cache_lookup_result = ePSGS_CacheNotHit;
310
311 if (!primary_id.empty()) {
312 CPSGCache psg_cache(true, m_Request, m_Reply);
313
314 // Try BIOSEQ_INFO
315 bioseq_resolution.m_BioseqInfo.SetAccession(primary_id);
316 bioseq_resolution.m_BioseqInfo.SetVersion(effective_version);
317 bioseq_resolution.m_BioseqInfo.SetSeqIdType(effective_seq_id_type);
318
319 bioseq_cache_lookup_result = psg_cache.LookupBioseqInfo(
320 bioseq_resolution);
321 if (bioseq_cache_lookup_result == ePSGS_CacheHit) {
322 bioseq_resolution.m_ResolutionResult = ePSGS_BioseqCache;
323 return ePSGS_CacheHit;
324 }
325
326 bioseq_resolution.Reset();
327 }
328 return bioseq_cache_lookup_result;
329 }
330
331
332 EPSGS_CacheLookupResult
x_ResolveSecondaryOSLTInCache(const string & secondary_id,int16_t effective_seq_id_type,SBioseqResolution & bioseq_resolution)333 CPSGS_ResolveBase::x_ResolveSecondaryOSLTInCache(
334 const string & secondary_id,
335 int16_t effective_seq_id_type,
336 SBioseqResolution & bioseq_resolution)
337 {
338 bioseq_resolution.m_BioseqInfo.SetAccession(secondary_id);
339 bioseq_resolution.m_BioseqInfo.SetSeqIdType(effective_seq_id_type);
340
341 CPSGCache psg_cache(true, m_Request, m_Reply);
342 auto si2csi_cache_lookup_result =
343 psg_cache.LookupSi2csi(bioseq_resolution);
344 if (si2csi_cache_lookup_result == ePSGS_CacheHit) {
345 bioseq_resolution.m_ResolutionResult = ePSGS_Si2csiCache;
346 return ePSGS_CacheHit;
347 }
348
349 bioseq_resolution.Reset();
350
351 if (si2csi_cache_lookup_result == ePSGS_CacheFailure)
352 return ePSGS_CacheFailure;
353 return ePSGS_CacheNotHit;
354 }
355
356
357 EPSGS_CacheLookupResult
x_ResolveAsIsInCache(SBioseqResolution & bioseq_resolution,bool need_as_is)358 CPSGS_ResolveBase::x_ResolveAsIsInCache(
359 SBioseqResolution & bioseq_resolution,
360 bool need_as_is)
361 {
362 EPSGS_CacheLookupResult cache_lookup_result = ePSGS_CacheNotHit;
363
364 // Capitalize seq_id
365 string upper_seq_id = GetRequestSeqId();
366 NStr::ToUpper(upper_seq_id);
367
368 auto seq_id_type = GetRequestSeqIdType();
369
370 // 1. As is
371 if (need_as_is == true) {
372 cache_lookup_result = x_ResolveSecondaryOSLTInCache(
373 upper_seq_id, seq_id_type,
374 bioseq_resolution);
375 }
376
377 if (cache_lookup_result == ePSGS_CacheNotHit) {
378 // 2. if there are | at the end => strip all trailing bars
379 // else => add one | at the end
380 if (upper_seq_id[upper_seq_id.size() - 1] == '|') {
381 string strip_bar_seq_id(upper_seq_id);
382 while (strip_bar_seq_id[strip_bar_seq_id.size() - 1] == '|')
383 strip_bar_seq_id.erase(strip_bar_seq_id.size() - 1, 1);
384 cache_lookup_result = x_ResolveSecondaryOSLTInCache(
385 strip_bar_seq_id, seq_id_type,
386 bioseq_resolution);
387 } else {
388 string seq_id_added_bar(upper_seq_id);
389 seq_id_added_bar.append(1, '|');
390 cache_lookup_result = x_ResolveSecondaryOSLTInCache(
391 seq_id_added_bar, seq_id_type,
392 bioseq_resolution);
393 }
394 }
395
396 if (cache_lookup_result == ePSGS_CacheFailure) {
397 bioseq_resolution.Reset();
398 bioseq_resolution.m_Error.m_ErrorMessage = "Cache lookup failure";
399 bioseq_resolution.m_Error.m_ErrorCode = CRequestStatus::e500_InternalServerError;
400 }
401
402 return cache_lookup_result;
403 }
404
405
406 void
x_ResolveViaComposeOSLTInCache(CSeq_id & parsed_seq_id,int16_t effective_seq_id_type,const list<string> & secondary_id_list,const string & primary_id,SBioseqResolution & bioseq_resolution)407 CPSGS_ResolveBase::x_ResolveViaComposeOSLTInCache(
408 CSeq_id & parsed_seq_id,
409 int16_t effective_seq_id_type,
410 const list<string> & secondary_id_list,
411 const string & primary_id,
412 SBioseqResolution & bioseq_resolution)
413 {
414 const CTextseq_id * text_seq_id = parsed_seq_id.GetTextseq_Id();
415 int16_t effective_version = GetEffectiveVersion(text_seq_id);
416 bool cache_failure = false;
417
418 if (!primary_id.empty()) {
419 auto cache_lookup_result =
420 x_ResolvePrimaryOSLTInCache(primary_id, effective_version,
421 effective_seq_id_type,
422 bioseq_resolution);
423 if (cache_lookup_result == ePSGS_CacheHit)
424 return;
425 if (cache_lookup_result == ePSGS_CacheFailure)
426 cache_failure = true;
427 }
428
429 for (const auto & secondary_id : secondary_id_list) {
430 auto cache_lookup_result =
431 x_ResolveSecondaryOSLTInCache(secondary_id,
432 effective_seq_id_type,
433 bioseq_resolution);
434 if (cache_lookup_result == ePSGS_CacheHit)
435 return;
436 if (cache_lookup_result == ePSGS_CacheFailure) {
437 cache_failure = true;
438 break;
439 }
440 }
441
442 // Try cache as it came from URL
443 // The primary id may match the URL given seq_id so it makes sense to
444 // exclude trying the very same string in x_ResolveAsIsInCache(). The
445 // x_ResolveAsIsInCache() capitalizes the url seq id so the capitalized
446 // versions need to be compared
447 string upper_seq_id = GetRequestSeqId();
448 NStr::ToUpper(upper_seq_id);
449 bool need_as_is = primary_id != upper_seq_id;
450 auto cache_lookup_result =
451 x_ResolveAsIsInCache(bioseq_resolution, need_as_is);
452 if (cache_lookup_result == ePSGS_CacheHit)
453 return;
454 if (cache_lookup_result == ePSGS_CacheFailure)
455 cache_failure = true;
456
457 bioseq_resolution.Reset();
458
459 if (cache_failure) {
460 bioseq_resolution.m_Error.m_ErrorMessage = "Cache lookup failure";
461 bioseq_resolution.m_Error.m_ErrorCode = CRequestStatus::e500_InternalServerError;
462 }
463 }
464
465
466 void
ResolveInputSeqId(void)467 CPSGS_ResolveBase::ResolveInputSeqId(void)
468 {
469 SBioseqResolution bioseq_resolution;
470 auto app = CPubseqGatewayApp::GetInstance();
471 string parse_err_msg;
472 CSeq_id oslt_seq_id;
473 auto parsing_result = x_ParseInputSeqId(oslt_seq_id,
474 parse_err_msg);
475
476 // The results of the ComposeOSLT are used in both cache and DB
477 int16_t effective_seq_id_type;
478 list<string> secondary_id_list;
479 string primary_id;
480 bool composed_ok = false;
481 if (parsing_result == ePSGS_ParsedOK) {
482 composed_ok = x_ComposeOSLT(oslt_seq_id, effective_seq_id_type,
483 secondary_id_list, primary_id);
484 }
485
486 auto request_use_cache = x_GetRequestUseCache();
487 if (request_use_cache != SPSGS_RequestBase::ePSGS_DbOnly) {
488 // Try cache
489 if (composed_ok)
490 x_ResolveViaComposeOSLTInCache(oslt_seq_id, effective_seq_id_type,
491 secondary_id_list, primary_id,
492 bioseq_resolution);
493 else
494 x_ResolveAsIsInCache(bioseq_resolution);
495
496 if (bioseq_resolution.IsValid()) {
497 // Special case for the seq_id like gi|156232
498 bool continue_with_cassandra = false;
499 if (bioseq_resolution.m_ResolutionResult == ePSGS_Si2csiCache) {
500 if (!CanSkipBioseqInfoRetrieval(
501 bioseq_resolution.m_BioseqInfo)) {
502 // This is an optimization. Try to find the record in the
503 // BIOSEQ_INFO only if needed.
504 CPSGCache psg_cache(true, m_Request, m_Reply);
505 auto bioseq_cache_lookup_result =
506 psg_cache.LookupBioseqInfo(bioseq_resolution);
507
508 if (bioseq_cache_lookup_result != ePSGS_CacheHit) {
509 // Not found or error
510 continue_with_cassandra = true;
511 bioseq_resolution.Reset();
512 } else {
513 bioseq_resolution.m_ResolutionResult = ePSGS_BioseqCache;
514
515 auto adj_result = AdjustBioseqAccession(
516 bioseq_resolution);
517 if (adj_result == ePSGS_LogicError ||
518 adj_result == ePSGS_SeqIdsEmpty) {
519 continue_with_cassandra = true;
520 bioseq_resolution.Reset();
521 }
522 }
523 }
524 } else {
525 // The result is coming from the BIOSEQ_INFO cache. Need to try
526 // the adjustment
527 auto adj_result = AdjustBioseqAccession(bioseq_resolution);
528 if (adj_result == ePSGS_LogicError ||
529 adj_result == ePSGS_SeqIdsEmpty) {
530 continue_with_cassandra = true;
531 bioseq_resolution.Reset();
532 }
533 }
534
535 if (!continue_with_cassandra) {
536 x_OnSeqIdResolveFinished(move(bioseq_resolution));
537 return;
538 }
539 }
540 }
541
542 if (request_use_cache != SPSGS_RequestBase::ePSGS_CacheOnly) {
543 // Need to initiate async DB resolution
544
545 // Memorize an error if there was one
546 if (! parse_err_msg.empty() &&
547 ! bioseq_resolution.m_Error.HasError()) {
548 bioseq_resolution.m_Error.m_ErrorMessage = parse_err_msg;
549 bioseq_resolution.m_Error.m_ErrorCode = CRequestStatus::e404_NotFound;
550 }
551
552 // Async request
553 m_AsyncStarted = true;
554 CPSGS_AsyncResolveBase::Process(
555 GetEffectiveVersion(oslt_seq_id.GetTextseq_Id()),
556 effective_seq_id_type,
557 move(secondary_id_list),
558 move(primary_id),
559 composed_ok,
560 move(bioseq_resolution));
561
562 // Async resolver will call a callback
563 return;
564 }
565
566 // Finished with resolution:
567 // - not found
568 // - parsing error
569 // - LMDB error
570 app->GetCounters().Increment(CPSGSCounters::ePSGS_InputSeqIdNotResolved);
571
572 if (bioseq_resolution.m_Error.HasError()) {
573 x_OnSeqIdResolveError(bioseq_resolution.m_Error.m_ErrorCode,
574 ePSGS_UnresolvedSeqId,
575 eDiag_Error,
576 bioseq_resolution.m_Error.m_ErrorMessage);
577 return;
578 }
579
580 if (!parse_err_msg.empty()) {
581 x_OnSeqIdResolveError(CRequestStatus::e404_NotFound,
582 ePSGS_UnresolvedSeqId, eDiag_Error,
583 parse_err_msg);
584 return;
585 }
586
587 x_OnSeqIdResolveError(CRequestStatus::e404_NotFound, ePSGS_UnresolvedSeqId,
588 eDiag_Error,
589 "Could not resolve seq_id " + GetRequestSeqId());
590 }
591
592
593 SBioseqResolution
ResolveTestInputSeqId(void)594 CPSGS_ResolveBase::ResolveTestInputSeqId(void)
595 {
596 // The method is to support the 'health' and 'deep-health' URLs.
597 // The only cache needs to be tried and no writing to the reply is allowed
598 SBioseqResolution bioseq_resolution;
599 string parse_err_msg;
600 CSeq_id oslt_seq_id;
601 auto parsing_result = x_ParseInputSeqId(oslt_seq_id,
602 parse_err_msg);
603
604 // The results of the ComposeOSLT are used in both cache and DB
605 int16_t effective_seq_id_type;
606 list<string> secondary_id_list;
607 string primary_id;
608 bool composed_ok = false;
609 if (parsing_result == ePSGS_ParsedOK) {
610 composed_ok = x_ComposeOSLT(oslt_seq_id, effective_seq_id_type,
611 secondary_id_list, primary_id);
612 }
613
614 // Try cache unconditionally
615 if (composed_ok)
616 x_ResolveViaComposeOSLTInCache(oslt_seq_id, effective_seq_id_type,
617 secondary_id_list, primary_id,
618 bioseq_resolution);
619 else
620 x_ResolveAsIsInCache(bioseq_resolution);
621
622
623 if (!bioseq_resolution.IsValid()) {
624 if (!bioseq_resolution.m_Error.HasError()) {
625 if (!parse_err_msg.empty()) {
626 bioseq_resolution.m_Error.m_ErrorMessage = parse_err_msg;
627 }
628 }
629 }
630
631 return bioseq_resolution;
632 }
633
634
635 void
x_OnSeqIdResolveError(CRequestStatus::ECode status,int code,EDiagSev severity,const string & message)636 CPSGS_ResolveBase::x_OnSeqIdResolveError(
637 CRequestStatus::ECode status,
638 int code,
639 EDiagSev severity,
640 const string & message)
641 {
642 auto app = CPubseqGatewayApp::GetInstance();
643 if (status == CRequestStatus::e404_NotFound) {
644 app->GetTiming().Register(eResolutionNotFound, eOpStatusNotFound,
645 m_Request->GetStartTimestamp());
646 if (m_AsyncStarted)
647 app->GetTiming().Register(eResolutionCass, eOpStatusNotFound,
648 GetAsyncResolutionStartTimestamp());
649 else
650 app->GetTiming().Register(eResolutionLmdb, eOpStatusNotFound,
651 m_Request->GetStartTimestamp());
652 }
653 else {
654 app->GetTiming().Register(eResolutionError, eOpStatusNotFound,
655 m_Request->GetStartTimestamp());
656 }
657
658 m_FinalErrorCB(status, code, severity, message);
659 }
660
661
662 // Called only in case of a success
x_OnSeqIdResolveFinished(SBioseqResolution && bioseq_resolution)663 void CPSGS_ResolveBase::x_OnSeqIdResolveFinished(
664 SBioseqResolution && bioseq_resolution)
665 {
666 // A few cases here: comes from cache or DB
667 // ePSGS_Si2csiCache, ePSGS_Si2csiDB, ePSGS_BioseqCache, ePSGS_BioseqDB
668 if (bioseq_resolution.m_ResolutionResult == ePSGS_Si2csiDB ||
669 bioseq_resolution.m_ResolutionResult == ePSGS_Si2csiCache) {
670 // We have the following fields at hand:
671 // - accession, version, seq_id_type, gi
672 // May be it is what the user asked for
673 if (!CanSkipBioseqInfoRetrieval(bioseq_resolution.m_BioseqInfo)) {
674 // Need to pull the full bioseq info
675 CPSGCache psg_cache(m_Request, m_Reply);
676 auto cache_lookup_result =
677 psg_cache.LookupBioseqInfo(bioseq_resolution);
678 if (cache_lookup_result != ePSGS_CacheHit) {
679 // No cache hit (or not allowed); need to get to DB if allowed
680 if (x_GetRequestUseCache() != SPSGS_RequestBase::ePSGS_CacheOnly) {
681 // Async DB query
682
683 // To have the proper timing registered in the errors
684 // handler
685 m_AsyncStarted = true;
686
687 if (bioseq_resolution.m_CassQueryCount == 0) {
688 // It now became cassandra based so need to memorize
689 // the start timestamp
690 SetAsyncResolutionStartTimestamp(
691 chrono::high_resolution_clock::now());
692 }
693
694 CPSGS_AsyncBioseqInfoBase::MakeRequest(
695 move(bioseq_resolution));
696 return;
697 }
698
699 // It is a bioseq inconsistency case
700 x_OnSeqIdResolveError(
701 CRequestStatus::e500_InternalServerError,
702 ePSGS_NoBioseqInfo, eDiag_Error,
703 "Data inconsistency: the bioseq key info was "
704 "resolved for seq_id " + GetRequestSeqId() +
705 " but the bioseq info is not found");
706 return;
707 } else {
708 bioseq_resolution.m_ResolutionResult = ePSGS_BioseqCache;
709 }
710 }
711 }
712
713 // All good
714 x_OnResolutionGoodData();
715 x_RegisterSuccessTiming(bioseq_resolution);
716 m_FinalFinishedCB(move(bioseq_resolution));
717 }
718
719
720 void
x_RegisterSuccessTiming(const SBioseqResolution & bioseq_resolution)721 CPSGS_ResolveBase::x_RegisterSuccessTiming(
722 const SBioseqResolution & bioseq_resolution)
723 {
724 auto app = CPubseqGatewayApp::GetInstance();
725
726 // Overall timing, regardless how it was done
727 app->GetTiming().Register(eResolutionFound, eOpStatusFound,
728 m_Request->GetStartTimestamp());
729
730 if (bioseq_resolution.m_CassQueryCount > 0) {
731 // Regardless how many requests
732 app->GetTiming().Register(eResolutionCass, eOpStatusFound,
733 GetAsyncResolutionStartTimestamp());
734
735 // Separated by the number of requests
736 app->GetTiming().Register(eResolutionFoundInCassandra,
737 eOpStatusFound,
738 GetAsyncResolutionStartTimestamp(),
739 bioseq_resolution.m_CassQueryCount);
740 } else {
741 app->GetTiming().Register(eResolutionLmdb, eOpStatusFound,
742 m_Request->GetStartTimestamp());
743 }
744 }
745
746
x_OnResolutionGoodData(void)747 void CPSGS_ResolveBase::x_OnResolutionGoodData(void)
748 {
749 m_FinalStartProcessingCB();
750 }
751
752