1 #ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP
2 #define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP
3 
4 /*  $Id: psg_client.hpp 628921 2021-04-07 18:46:41Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Denis Vakatov (design), Rafael Sadyrov (implementation)
30  *
31  */
32 
33 #include <corelib/ncbimisc.hpp>
34 #include <corelib/ncbitime.hpp>
35 #include <corelib/ncbi_url.hpp>
36 #include <corelib/request_ctx.hpp>
37 #include <connect/services/json_over_uttp.hpp>
38 #include <objects/seq/Seq_inst.hpp>
39 #include <objects/seqloc/Seq_loc.hpp>
40 #include <objects/seqset/Bioseq_set.hpp>
41 #include <objects/seq/Seq_annot.hpp>
42 #include <objects/seqsplit/ID2S_Seq_annot_Info.hpp>
43 
44 
45 #if defined(NCBI_THREADS) && defined(HAVE_LIBNGHTTP2) && defined(HAVE_LIBUV)
46 #  define HAVE_PSG_CLIENT 1
47 #endif
48 
49 
50 #if defined(HAVE_PSG_CLIENT)
51 BEGIN_NCBI_SCOPE
52 
53 
54 
55 class CPSG_Exception : public CException
56 {
57 public:
58     enum EErrCode {
59         eTimeout,
60         eServerError,
61         eInternalError,
62         eParameterMissing
63     };
64 
65     virtual const char* GetErrCodeString(void) const override;
66 
67     NCBI_EXCEPTION_DEFAULT(CPSG_Exception, CException);
68 };
69 
70 
71 
72 /// Request to the PSG server (see "CPSG_Request_*" below)
73 ///
74 
75 class CPSG_Request
76 {
77 public:
78     /// Get the user-provided context
79     template<typename TUserContext>
GetUserContext() const80     shared_ptr<TUserContext> GetUserContext() const
81     { return static_pointer_cast<TUserContext>(m_UserContext); }
82 
83     /// Get request context
GetRequestContext() const84     CRef<CRequestContext> GetRequestContext() const { return m_RequestContext; }
85 
86     /// Get request type
GetType() const87     string GetType() const { return x_GetType(); }
88 
89     // Get request ID
GetId() const90     string GetId() const { return x_GetId(); }
91 
92     /// Set hops
SetHops(unsigned hops)93     void SetHops(unsigned hops) { m_Hops = hops; }
94 
95 protected:
CPSG_Request(shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})96     CPSG_Request(shared_ptr<void> user_context = {},
97                  CRef<CRequestContext> request_context = {})
98         : m_UserContext(user_context),
99           m_RequestContext(request_context)
100     {}
101 
102     virtual ~CPSG_Request() = default;
103 
104 private:
105     virtual string x_GetType() const = 0;
106     virtual string x_GetId() const = 0;
107     virtual void x_GetAbsPathRef(ostream&) const = 0;
108 
109     shared_ptr<void> m_UserContext;
110     CRef<CRequestContext> m_RequestContext;
111     unsigned m_Hops = 0;
112 
113     friend class CPSG_Queue;
114 };
115 
116 
117 
118 /// Bio-id (such as accession)
119 ///
120 class CPSG_BioId
121 {
122 public:
123     using TType = objects::CSeq_id::E_Choice;
124 
125     /// @param id
126     ///  Bio ID (like accession)
CPSG_BioId(string id,TType type={})127     CPSG_BioId(string id, TType type = {}) : m_Id(move(id)), m_Type(type) {}
128 
129     /// Get tilde-separated string representation of this bio ID (e.g. for logging)
130     string Repr() const;
131 
132     /// Get ID
GetId() const133     const string& GetId() const { return m_Id; }
134 
135     /// Get type
GetType() const136     TType GetType() const { return m_Type; }
137 
138 private:
139     string m_Id;
140     TType  m_Type;
141 };
142 
143 
144 
145 /// Blob data unique ID
146 ///
147 class CPSG_DataId
148 {
149 public:
150     virtual ~CPSG_DataId() = default;
151 
152     /// Get tilde-separated string representation of this data ID (e.g. for logging)
153     virtual string Repr() const = 0;
154 };
155 
156 
157 
158 /// Blob unique ID
159 ///
160 class CPSG_BlobId : public CPSG_DataId
161 {
162 public:
163     using TLastModified = CNullable<Int8>;
164 
165     /// Mainstream blob ID ctor - from a string ID
166     /// @param id
167     ///  Blob ID
CPSG_BlobId(string id,TLastModified last_modified={})168     CPSG_BlobId(string id, TLastModified last_modified = {})
169         : m_Id(move(id)),
170           m_LastModified(move(last_modified))
171     {}
172 
173     /// Historical blob ID system -- based on the "satellite" and the "key"
174     /// inside it. It'll be translated into "<sat>.<sat_key>" string.
175     /// @sa  objects::CID2_Blob_Id::TSat, objects::CID2_Blob_Id::TSat_key
CPSG_BlobId(int sat,int sat_key,TLastModified last_modified={})176     CPSG_BlobId(int sat, int sat_key, TLastModified last_modified = {})
177         : m_Id(to_string(sat) + "." + to_string(sat_key)),
178           m_LastModified(move(last_modified))
179     {}
180 
181     /// Get tilde-separated string representation of this blob ID (e.g. for logging)
182     string Repr() const override;
183 
184     /// Get ID
GetId() const185     const string& GetId() const { return m_Id; }
186 
187     /// Get last modified
GetLastModified() const188     const TLastModified& GetLastModified() const { return m_LastModified; }
189 
190 private:
191     string m_Id;
192     TLastModified m_LastModified;
193 };
194 
195 
196 
197 /// Chunk unique ID
198 ///
199 class CPSG_ChunkId : public CPSG_DataId
200 {
201 public:
CPSG_ChunkId(Uint8 id2_chunk,string id2_info)202     CPSG_ChunkId(Uint8 id2_chunk, string id2_info)
203         : m_Id2Chunk(id2_chunk),
204           m_Id2Info(move(id2_info))
205     {}
206 
207     /// Get tilde-separated string representation of this chunk ID (e.g. for logging)
208     string Repr() const override;
209 
210     /// Get ID2 chunk number
GetId2Chunk() const211     Uint8 GetId2Chunk() const { return m_Id2Chunk; }
212 
213     /// Get ID2 info
GetId2Info() const214     const string& GetId2Info() const { return m_Id2Info; }
215 
216 private:
217     Uint8 m_Id2Chunk;
218     string m_Id2Info;
219 };
220 
221 
222 
223 /// Whether and how to substitute version-less primary seq-ids with
224 /// the "more unique" secondary seq-ids
225 enum class EPSG_AccSubstitution {
226     Default,  ///< Substitute always (default)
227     Limited,  ///< Substitute only if the resolved record's seq_id_type is GI(12)
228     Never     ///< No substitution whatsoever - return exact raw accession info
229 };
230 
231 
232 
233 /// Request to the PSG server (by bio-id, for a biodata specific info and data)
234 ///
235 
236 class CPSG_Request_Biodata : public CPSG_Request
237 {
238 public:
239     ///
CPSG_Request_Biodata(CPSG_BioId bio_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})240     CPSG_Request_Biodata(CPSG_BioId       bio_id,
241                          shared_ptr<void> user_context = {},
242                          CRef<CRequestContext> request_context = {})
243         : CPSG_Request(user_context, request_context),
244           m_BioId(bio_id)
245     {}
246 
GetBioId() const247     const CPSG_BioId& GetBioId() const { return m_BioId; }
248 
249     /// Specify which info and data is needed
250     enum EIncludeData {
251         /// Server default
252         eDefault,
253 
254         /// Only the info
255         eNoTSE,
256 
257         /// If ID2 split is available, return split info blob only.
258         /// Otherwise, return no data.
259         eSlimTSE,
260 
261         /// If ID2 split is available, return split info blob only.
262         /// Otherwise, return all Cassandra data chunks of the blob itself.
263         eSmartTSE,
264 
265         /// If ID2 split is available, return all split blobs.
266         /// Otherwise, return all Cassandra data chunks of the blob itself.
267         eWholeTSE,
268 
269         /// Return all Cassandra data chunks of the blob itself.
270         eOrigTSE
271     };
IncludeData(EIncludeData include)272     void IncludeData(EIncludeData include) { m_IncludeData = include; }
273 
GetIncludeData() const274     EIncludeData GetIncludeData() const { return m_IncludeData; }
275 
276     using TExcludeTSEs = vector<CPSG_BlobId>;
277 
ExcludeTSE(CPSG_BlobId blob_id)278     void ExcludeTSE(CPSG_BlobId blob_id) { m_ExcludeTSEs.emplace_back(move(blob_id)); }
279 
GetExcludeTSEs() const280     const TExcludeTSEs& GetExcludeTSEs() const { return m_ExcludeTSEs; }
281 
282     /// Set substitution policy for version-less primary seq-ids
SetAccSubstitution(EPSG_AccSubstitution acc_substitution)283     void SetAccSubstitution(EPSG_AccSubstitution acc_substitution) { m_AccSubstitution = acc_substitution; }
284 
285     /// Enable/disable auto blob skipping on server for this request
SetAutoBlobSkipping(bool auto_blob_skipping)286     void SetAutoBlobSkipping(bool auto_blob_skipping) { m_AutoBlobSkipping = auto_blob_skipping ? eOn : eOff; }
287 
288 private:
x_GetType() const289     string x_GetType() const override { return "biodata"; }
x_GetId() const290     string x_GetId() const override { return GetBioId().Repr(); }
291     void x_GetAbsPathRef(ostream&) const override;
292 
293     CPSG_BioId    m_BioId;
294     EIncludeData  m_IncludeData = EIncludeData::eDefault;
295     TExcludeTSEs  m_ExcludeTSEs;
296     EPSG_AccSubstitution m_AccSubstitution = EPSG_AccSubstitution::Default;
297     ESwitch m_AutoBlobSkipping = ESwitch::eDefault;
298 };
299 
300 
301 
302 /// Request to the PSG server (by bio-id, for a biodata specific info and data)
303 ///
304 
305 class CPSG_Request_Resolve : public CPSG_Request
306 {
307 public:
308     ///
CPSG_Request_Resolve(CPSG_BioId bio_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})309     CPSG_Request_Resolve(CPSG_BioId       bio_id,
310                          shared_ptr<void> user_context = {},
311                          CRef<CRequestContext> request_context = {})
312         : CPSG_Request(user_context, request_context),
313           m_BioId(bio_id)
314     {}
315 
GetBioId() const316     const CPSG_BioId& GetBioId() const { return m_BioId; }
317 
318     /// Specify which info and data is needed
319     enum EIncludeInfo : unsigned {
320         // These flags correspond exactly to the CPSG_BioseqInfo's getters
321         fCanonicalId      = (1 << 1),
322         fName             = (1 << 2), ///< Requests name to use for canonical bio-id
323         fOtherIds         = (1 << 3),
324         fMoleculeType     = (1 << 4),
325         fLength           = (1 << 5),
326         fChainState       = (1 << 6),
327         fState            = (1 << 7),
328         fBlobId           = (1 << 8),
329         fTaxId            = (1 << 9),
330         fHash             = (1 << 10),
331         fDateChanged      = (1 << 11),
332         fGi               = (1 << 12),
333         fAllInfo          = numeric_limits<unsigned>::max()
334     };
335     DECLARE_SAFE_FLAGS_TYPE(EIncludeInfo, TIncludeInfo);
IncludeInfo(TIncludeInfo include)336     void IncludeInfo(TIncludeInfo include) { m_IncludeInfo = include; }
337 
GetIncludeInfo() const338     TIncludeInfo      GetIncludeInfo() const { return m_IncludeInfo; }
339 
340     /// Set substitution policy for version-less primary seq-ids
SetAccSubstitution(EPSG_AccSubstitution acc_substitution)341     void SetAccSubstitution(EPSG_AccSubstitution acc_substitution) { m_AccSubstitution = acc_substitution; }
342 
343 private:
x_GetType() const344     string x_GetType() const override { return "resolve"; }
x_GetId() const345     string x_GetId() const override { return GetBioId().Repr(); }
346     void x_GetAbsPathRef(ostream&) const override;
347 
348     CPSG_BioId    m_BioId;
349     TIncludeInfo  m_IncludeInfo = TIncludeInfo(0);
350     EPSG_AccSubstitution m_AccSubstitution = EPSG_AccSubstitution::Default;
351 };
352 
353 
354 
355 /// Request to the PSG server (by blob-id, for a particular blob of data)
356 ///
357 
358 class CPSG_Request_Blob : public CPSG_Request
359 {
360 public:
361     ///
CPSG_Request_Blob(CPSG_BlobId blob_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})362     CPSG_Request_Blob(CPSG_BlobId           blob_id,
363                       shared_ptr<void>      user_context = {},
364                       CRef<CRequestContext> request_context = {})
365         : CPSG_Request(move(user_context), move(request_context)),
366           m_BlobId(move(blob_id))
367     {}
368 
GetBlobId() const369     const CPSG_BlobId& GetBlobId()       const { return m_BlobId; }
370 
371     /// Specify which data is needed (info is always returned)
372     using EIncludeData = CPSG_Request_Biodata::EIncludeData;
IncludeData(EIncludeData include)373     void IncludeData(EIncludeData include) { m_IncludeData = include; }
374 
GetIncludeData() const375     EIncludeData GetIncludeData() const { return m_IncludeData; }
376 
377 private:
x_GetType() const378     string x_GetType() const override { return "blob"; }
x_GetId() const379     string x_GetId() const override { return GetBlobId().Repr(); }
380     void x_GetAbsPathRef(ostream&) const override;
381 
382     CPSG_BlobId  m_BlobId;
383     EIncludeData m_IncludeData = EIncludeData::eDefault;
384 };
385 
386 
387 
388 /// Request meta-information for the named annotations which are defined on the
389 /// bioseq
390 ///
391 
392 class CPSG_Request_NamedAnnotInfo : public CPSG_Request
393 {
394 public:
395     /// Names of the named annotations
396     using TAnnotNames = vector<string>;
397 
398     /// @param bio_id
399     ///  ID of the bioseq
400     /// @param annot_names
401     ///  List of NAs for which to request the metainfo
CPSG_Request_NamedAnnotInfo(CPSG_BioId bio_id,TAnnotNames annot_names,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})402     CPSG_Request_NamedAnnotInfo(CPSG_BioId       bio_id,
403                                 TAnnotNames      annot_names,
404                                 shared_ptr<void> user_context = {},
405                                 CRef<CRequestContext> request_context = {})
406         : CPSG_Request(user_context, request_context),
407           m_BioId(bio_id),
408           m_AnnotNames(annot_names)
409     {}
410 
GetBioId() const411     const CPSG_BioId&  GetBioId()      const { return m_BioId;      }
GetAnnotNames() const412     const TAnnotNames& GetAnnotNames() const { return m_AnnotNames; }
413 
414     /// Set substitution policy for version-less primary seq-ids
SetAccSubstitution(EPSG_AccSubstitution acc_substitution)415     void SetAccSubstitution(EPSG_AccSubstitution acc_substitution) { m_AccSubstitution = acc_substitution; }
416 
417     /// Specify which data is needed (info is always returned)
418     using EIncludeData = CPSG_Request_Biodata::EIncludeData;
IncludeData(EIncludeData include)419     void IncludeData(EIncludeData include) { m_IncludeData = include; }
420 
GetIncludeData() const421     EIncludeData GetIncludeData() const { return m_IncludeData; }
422 
423 private:
x_GetType() const424     string x_GetType() const override { return "annot"; }
x_GetId() const425     string x_GetId() const override { return GetBioId().Repr(); }
426     void x_GetAbsPathRef(ostream&) const override;
427 
428     CPSG_BioId  m_BioId;
429     TAnnotNames m_AnnotNames;
430     EPSG_AccSubstitution m_AccSubstitution = EPSG_AccSubstitution::Default;
431     EIncludeData m_IncludeData = EIncludeData::eDefault;
432 };
433 
434 
435 
436 /// Request blob data chunk
437 ///
438 
439 class CPSG_Request_Chunk : public CPSG_Request
440 {
441 public:
CPSG_Request_Chunk(CPSG_ChunkId chunk_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})442     CPSG_Request_Chunk(CPSG_ChunkId          chunk_id,
443                        shared_ptr<void>      user_context = {},
444                        CRef<CRequestContext> request_context = {})
445         : CPSG_Request(move(user_context), move(request_context)),
446           m_ChunkId(move(chunk_id))
447     {}
448 
GetChunkId() const449     const CPSG_ChunkId& GetChunkId() const { return m_ChunkId; }
450 
451 private:
x_GetType() const452     string x_GetType() const override { return "chunk"; }
x_GetId() const453     string x_GetId() const override { return GetChunkId().Repr(); }
454     void x_GetAbsPathRef(ostream&) const override;
455 
456     CPSG_ChunkId m_ChunkId;
457 };
458 
459 
460 
461 /// Retrieval result
462 /// @sa GetStatus
463 enum class EPSG_Status {
464     eSuccess,       ///< Successfully retrieved
465     eInProgress,    ///< Retrieval is not finalized yet, more info may come
466     eNotFound,      ///< Not found
467     eCanceled,      ///< Request canceled
468 
469     /// An error was encountered while trying to send request or to read
470     /// and to process the reply.
471     /// If PSG server sends a message with severity:
472     /// - Error, Critical or Fatal -- this status will be set, and any data
473     ///   data in the reply item must be considered invalid; such messages
474     ///   will also be logged by the client API with severity Error.
475     /// - Trace, Info or Warning -- are considered to be informational, so
476     ///   these do NOT affect the status; such messages however will still
477     ///   be logged by the client API with the same (T, I or W) severity.
478     eError
479 };
480 
481 
482 
483 class CPSG_Reply;
484 
485 
486 
487 /// A self-containing part of the reply, e.g. a meta-data or a data blob.
488 
489 class CPSG_ReplyItem
490 {
491 public:
492     enum EType {
493         eBlobData,
494         eBlobInfo,
495         eSkippedBlob,
496         eBioseqInfo,
497         eNamedAnnotInfo,
498         ePublicComment,
499         eProcessor,
500         eEndOfReply,    ///< No more items expected in the (overall!) reply
501     };
502 
GetType() const503     EType GetType() const { return m_Type; }
504 
505     /// Get the final result of this blob's retrieval.
506     /// If the blob retrieval is not finalized by the deadline, then
507     /// "eInProgress" is returned.
508     EPSG_Status GetStatus(CDeadline deadline) const;
509 
510     /// Unstructured text containing auxiliary info about the result --
511     /// such as messages and errors that came from the PSG server or occured
512     /// while trying to send request or to read and to process the reply.
513     string GetNextMessage() const;
514 
515     /// Get the reply that contains this item
GetReply() const516     shared_ptr<CPSG_Reply> GetReply() const { return m_Reply; }
517 
518     /// Get processor ID
GetProcessorId()519     const string& GetProcessorId() { return m_ProcessorId; }
520 
521     virtual ~CPSG_ReplyItem();
522 
523 protected:
524     CPSG_ReplyItem(EType type);
525 
526 private:
527     struct SImpl;
528     unique_ptr<SImpl>      m_Impl;
529     shared_ptr<CPSG_Reply> m_Reply;
530     const EType            m_Type;
531     string                 m_ProcessorId;
532 
533     friend class CPSG_Reply;
534 };
535 
536 
537 
538 /// Blob data.
539 
540 class CPSG_BlobData : public CPSG_ReplyItem
541 {
542 public:
543     /// Get data ID
544     template <class TDataId = CPSG_DataId>
GetId() const545     const TDataId* GetId() const { return dynamic_cast<const TDataId*>(m_Id.get()); }
546 
547     /// Get the stream from which to read the item's content.
548     /// @note  If no content, then reading from the stream will result in EOF.
GetStream() const549     istream& GetStream() const { return *m_Stream; }
550 
551 private:
552     CPSG_BlobData(unique_ptr<CPSG_DataId> id);
553 
554     unique_ptr<CPSG_DataId> m_Id;
555     unique_ptr<istream> m_Stream;
556 
557     friend class CPSG_Reply;
558 };
559 
560 
561 
562 /// Blob data meta information
563 
564 class CPSG_BlobInfo : public CPSG_ReplyItem
565 {
566 public:
567     /// Get data ID
568     template <class TDataId = CPSG_DataId>
GetId() const569     const TDataId* GetId() const { return dynamic_cast<const TDataId*>(m_Id.get()); }
570 
571     /// Get data compression algorithm: gzip, bzip2, zip, compress, nlmzip, ...
572     /// Return empty string if the blob data is not compressed
573     string GetCompression() const;
574 
575     /// Get data serialization format:  asn.1, asn1-text, json, xml, ...
576     string GetFormat() const;
577 
578     /// Get size of the blob data (as it is stored)
579     Uint8 GetStorageSize() const;
580 
581     /// Get size of the real (before any compression or encryption) blob data
582     Uint8 GetSize() const;
583 
584     /// Return TRUE if the blob data is "dead"
585     bool IsDead() const;
586 
587     /// Return TRUE if the blob data is "suppressed"
588     bool IsSuppressed() const;
589 
590     /// Return TRUE if the blob data is "withdrawn"
591     bool IsWithdrawn() const;
592 
593     /// Date when the blob data will be released for public use.
594     /// If the blob data is already released, then return "empty" (IsEmpty()) time
595     CTime GetHupReleaseDate() const;
596 
597     /// Blob data owner's ID
598     Uint8 GetOwner() const;
599 
600     /// Date when the blob data was first loaded into the database
601     CTime GetOriginalLoadDate() const;
602 
603     /// Class of this blob data
604     objects::CBioseq_set::EClass GetClass() const;
605 
606     /// Internal division value (used by various dumpers)
607     string GetDivision() const;
608 
609     /// Name of the user who loaded this blob data
610     string GetUsername() const;
611 
612     /// Get ID2 info
613     string GetId2Info() const;
614 
615     /// Get number of chunks
616     Uint8 GetNChunks() const;
617 
618 private:
619     CPSG_BlobInfo(unique_ptr<CPSG_DataId> id);
620 
621     unique_ptr<CPSG_DataId> m_Id;
622     CJsonNode m_Data;
623 
624     friend class CPSG_Reply;
625 };
626 
627 
628 
629 /// Skipped blob.
630 
631 class CPSG_SkippedBlob : public CPSG_ReplyItem
632 {
633 public:
634     enum EReason {
635         eExcluded,   // Explicitly excluded by the client
636         eInProgress, // Is being sent to the client
637         eSent,       // Already sent to the client
638         eUnknown,    // Skipped for unknown reason
639     };
640 
641     /// Get blob ID
GetId() const642     const CPSG_BlobId& GetId() const { return m_Id; }
643 
644     // Get reason for blob skipping
GetReason() const645     EReason GetReason() const { return m_Reason; }
646 
647 private:
648     CPSG_SkippedBlob(CPSG_BlobId id, EReason reason);
649 
650     CPSG_BlobId m_Id;
651     EReason     m_Reason;
652 
653     friend class CPSG_Reply;
654 };
655 
656 
657 
658 /// Bio-sequence metainfo -- result of the bio-id resolution.
659 ///
660 /// It can be used to identify which data blobs (related to the requested
661 /// bio-id retrieval) server is sending right away. It also contains
662 /// resolution information as well as the information about which
663 /// other biodata-related blobs are also available on the server and how
664 /// they can be explicitly requested for later retrieval, if needed.
665 ///
666 /// @note
667 ///  Most of the data comes from table "BIOSEQ_INFO" and from the named
668 ///  annotation tables.
669 
670 class CPSG_BioseqInfo : public CPSG_ReplyItem
671 {
672 public:
673     /// Get canonical bio-id for the bioseq (usually "accession.version")
674     CPSG_BioId GetCanonicalId() const;
675 
676     /// Get non-canonical bio-ids (aliases) for the bioseq
677     vector<CPSG_BioId> GetOtherIds() const;
678 
679     /// The bioseq's molecule type (DNA, RNA, protein, etc)
680     objects::CSeq_inst::TMol GetMoleculeType() const;
681 
682     /// Length of bio-sequence
683     Uint8 GetLength() const;
684 
685     /// State of the bio-sequence's seq-id
686     enum EState {
687         eDead     =  0,
688         eSought   =  1,
689         eReserved =  5,
690         eMerged   =  7,
691         eLive     = 10
692     };
693     typedef int TState;  ///< @sa EState
694 
695     /// State of the bio-sequence's seq-id chain, i.e. the state of the very
696     /// latest seq-id in this bio-sequence's seq-id chain
697     TState GetChainState() const;
698 
699     /// State of this exact bio-sequence's seq-id.
700     /// I.e., for the latest seq-id in a chain it is equal to GetState(), and
701     /// for all other seq-ids in a chain it's zero (eDead).
702     TState GetState() const;
703 
704     /// Get coordinates of the TSE blob that contains the bioseq itself
705     CPSG_BlobId GetBlobId() const;
706 
707     /// Get the bioseq's taxonomy ID
708     TTaxId GetTaxId() const;
709 
710     /// Get the bioseq's (pre-calculated) hash
711     int GetHash() const;
712 
713     /// Date when the bioseq was changed last time
714     CTime GetDateChanged() const;
715 
716     /// Get GI
717     TGi GetGi() const;
718 
719     /// What data is immediately available now. Other data will require
720     /// a separate hit to the server.
721     /// @sa CPSG_Request_Resolve::IncludeInfo()
722     CPSG_Request_Resolve::TIncludeInfo IncludedInfo() const;
723 
724 private:
725     CPSG_BioseqInfo();
726 
727     CJsonNode m_Data;
728 
729     friend class CPSG_Reply;
730 };
731 
732 
733 
734 /// Named Annotations (NAs) metainfo -- reply to CPSG_Request_NamedAnnotInfo.
735 ///
736 /// It can be used to identify where various types of requested NAs are located
737 /// on the bioseq. It also provides information how to retrieve the
738 /// corresponding NA data blobs (as needed).
739 
740 class CPSG_NamedAnnotInfo : public CPSG_ReplyItem
741 {
742 public:
743     /// Name of the annotation
GetName() const744     const string& GetName() const { return m_Name; }
745 
746     /// Annotated bio-id
747     CPSG_BioId GetAnnotatedId() const;
748 
749     /// Range where the feature(s) from this NA appear on the bio-sequence
750     CRange<TSeqPos> GetRange() const;
751 
752     /// Coordinates of the blob that contains the NA data
753     CPSG_BlobId GetBlobId() const;
754 
755     /// Available zoom levels
756     using TZoomLevel  = unsigned int;
757     using TZoomLevels = vector<TZoomLevel>;
758     TZoomLevels GetZoomLevels() const;
759 
760     ///
761     struct SAnnotInfo
762     {
763         using TAnnotType = objects::CSeq_annot::C_Data::E_Choice;
764 
765         TAnnotType annot_type;
766         int        feat_type;
767         int        feat_subtype;
768     };
769 
770     using TAnnotInfoList = list<SAnnotInfo>;
771     TAnnotInfoList GetAnnotInfoList() const;
772 
773     /// Base64 encoded asn.1 of ID2-Seq-annot-Info
774     string GetId2AnnotInfo() const;
775 
776     /// Detailed ID2-Seq-annot-Info structures (from GetId2AnnotInfo, decoded)
777     /// @sa GetId2AnnotInfo
778     /// @{
779     using TId2AnnotInfo = objects::CID2S_Seq_annot_Info;
780     using TId2AnnotInfoList = list<CRef<TId2AnnotInfo>>;
781     TId2AnnotInfoList GetId2AnnotInfoList() const;
782     /// @}
783 
784 private:
785     CPSG_NamedAnnotInfo(string name);
786 
787     string     m_Name;
788     CJsonNode  m_Data;
789 
790     friend class CPSG_Reply;
791 };
792 
793 
794 
795 /// Public comment
796 
797 class CPSG_PublicComment : public CPSG_ReplyItem
798 {
799 public:
800     /// Get data ID for this public comment
801     template <class TDataId = CPSG_DataId>
GetId() const802     const TDataId* GetId() const { return dynamic_cast<const TDataId*>(m_Id.get()); }
803 
804     /// Get text
GetText() const805     const string& GetText() const { return m_Text; }
806 
807 private:
808     CPSG_PublicComment(unique_ptr<CPSG_DataId> id, string text);
809 
810     unique_ptr<CPSG_DataId> m_Id;
811     string m_Text;
812 
813     friend class CPSG_Reply;
814 };
815 
816 
817 
818 /// PSG reply -- corresponds to a PSG request. It is used to retrieve data
819 /// (accession resolution; bio-sequence; annotation blobs) from the storage.
820 ///
821 /// Reply may contain:
822 ///  - Reply items (CPSG_ReplyItem), each of which in turn may contain
823 ///    item-specific info and/or data blob
824 ///  - Server messages related to the whole reply
825 ///
826 
827 class CPSG_Reply
828 {
829 public:
830     /// Get the final result of this whole reply's retrieval.
831     /// If the reply retrieval is not finalized by the deadline, then
832     /// "eInProgress" is returned.
833     EPSG_Status GetStatus(CDeadline deadline) const;
834 
835     /// Unstructured text containing auxiliary info about the result --
836     /// such as messages and errors that came from the PSG server or occured
837     /// while trying to send request or to read and to process the reply.
838     string GetNextMessage() const;
839 
840     /// Get the request that resulted in this reply
GetRequest() const841     shared_ptr<const CPSG_Request> GetRequest() const { return m_Request; }
842 
843     /// Get the next item which has started arriving from the server.
844     /// @note
845     ///  Some of the item's data may still be in transit or not even sent
846     ///  in by the server yet.
847     /// @param deadline
848     ///  Until what time to wait for the next item to start coming in.
849     /// @return
850     ///  - The item objects from which you can start reading data
851     ///  - If no more items expected in the reply, the returned item will have
852     ///    type eEndOfReply
853     ///  - On expired timeout, the returned pointer will be empty (nullptr)
854     /// @throw
855     ///  If an error has been detected.
856     shared_ptr<CPSG_ReplyItem> GetNextItem(CDeadline deadline);
857 
858     ~CPSG_Reply();
859 
860 private:
861     CPSG_Reply();
862 
863     struct SImpl;
864     unique_ptr<SImpl>              m_Impl;
865     shared_ptr<const CPSG_Request> m_Request;
866 
867     friend class CPSG_Queue;
868 };
869 
870 
871 
872 /// A queue to retrieve data (accession resolution info; bio-sequence;
873 /// annotation blobs) from the storage.
874 ///
875 /// Call SendRequest() to schedule retrievals (by their bio-ids or
876 /// blob-ids). Then, call GetNextReply() to get the next reply whose data
877 /// has started coming in.
878 ///
879 /// All methods are MT-safe.  Data from different replies can be read in
880 /// parallel.
881 ///
882 /// The queue object can be used from more than one thread, either to push
883 /// requests or to get the incoming ready-to-be-retrieved replies.
884 ///
885 /// Results for the requests which were pushed into a given instance of
886 /// the queue will be available for retrieval using this (and only this) queue
887 /// instance regardless of which threads were used to push the request to the
888 /// queue.
889 ///
890 /// If more than one request was pushed into the queue, then the replies to all
891 /// of the requests may come, in any order.
892 ///
893 
894 class CPSG_Queue
895 {
896 public:
897     /// Creates an uninitialized instance.
898     /// It allows to postpone queue initialization until later.
899     /// The uninitialized instances can then be initialized using
900     /// regular constructor and move assignment operator.
901     CPSG_Queue();
902 
903     /// @param service
904     ///  Either a name of service (which can be resolved into a set of PSG
905     ///  servers) or a single fixed PSG server (in format "host:port")
906     CPSG_Queue(const string& service);
907     ~CPSG_Queue();
908 
909     /// Push request into the queue.
910     /// @param request
911     ///  The request (containing either bio- or blob-id to retrieve) to send.
912     /// @param deadline
913     ///  For how long to try to push the request into the queue.
914     /// @return
915     ///  - TRUE if it succeeds in pushing the request into the queue
916     ///  - FALSE on timeout (ie. if cannot do it before the specified deadline)
917     /// @throw  CPSG_Exception
918     ///  If any (non-timeout) error condition occures.
919     /// @sa Get()
920     bool SendRequest(shared_ptr<CPSG_Request> request,
921                      CDeadline                deadline);
922 
923 
924     /// Get the next reply which has started arriving from the server.
925     /// @param deadline
926     ///  Until what time to wait for the next reply to start coming in.
927     /// @return
928     ///  - Reply object from which you can obtain particular items.
929     ///  - On expired timeout, the returned pointer will be empty (nullptr).
930     /// @throw
931     ///  If an error has been detected.
932     shared_ptr<CPSG_Reply> GetNextReply(CDeadline deadline);
933 
934 
935     /// Stop accepting new requests.
936     /// All already accepted requests will be processed as usual.
937     /// No requests are accepted after the stop.
938     void Stop();
939 
940 
941     /// Stop accepting new requests and
942     /// cancel all requests whose replies have not been returned yet.
943     /// No requests are accepted and no replies are returned after the reset.
944     void Reset();
945 
946 
947     /// Check whether the queue was stopped/reset and is now empty.
948     bool IsEmpty() const;
949 
950 
951     /// Check whether the queue has been initialized.
IsInitialized() const952     bool IsInitialized() const { return static_cast<bool>(m_Impl); }
953 
954 
955     /// Is the queue in a state (possibly temporary) when requests get immediately rejected.
956     bool RejectsRequests() const;
957 
958 
959     /// Get an API lock.
960     /// Holding this API lock is essential if numerous short-lived queue instances are used.
961     /// It prevents an internal I/O implementation (threads, TCP connections, HTTP sessions, etc)
962     /// from being destroyed (on destroying last remaining queue instance)
963     /// and then re-created (with new queue instance).
964     using TApiLock = shared_ptr<void>;
965     static TApiLock GetApiLock();
966 
967 
968     CPSG_Queue(CPSG_Queue&&);
969     CPSG_Queue& operator=(CPSG_Queue&&);
970 
971 private:
972     struct SImpl;
973     unique_ptr<SImpl> m_Impl;
974 };
975 
976 
977 DECLARE_SAFE_FLAGS(CPSG_Request_Resolve::EIncludeInfo);
978 
979 END_NCBI_SCOPE
980 
981 
982 #endif  /* HAVE_PSG_CLIENT */
983 #endif  /* OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP */
984