1 #ifndef OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP 2 #define OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP 3 4 /* $Id: psg_client.hpp 628921 2021-04-07 18:46:41Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Denis Vakatov (design), Rafael Sadyrov (implementation) 30 * 31 */ 32 33 #include <corelib/ncbimisc.hpp> 34 #include <corelib/ncbitime.hpp> 35 #include <corelib/ncbi_url.hpp> 36 #include <corelib/request_ctx.hpp> 37 #include <connect/services/json_over_uttp.hpp> 38 #include <objects/seq/Seq_inst.hpp> 39 #include <objects/seqloc/Seq_loc.hpp> 40 #include <objects/seqset/Bioseq_set.hpp> 41 #include <objects/seq/Seq_annot.hpp> 42 #include <objects/seqsplit/ID2S_Seq_annot_Info.hpp> 43 44 45 #if defined(NCBI_THREADS) && defined(HAVE_LIBNGHTTP2) && defined(HAVE_LIBUV) 46 # define HAVE_PSG_CLIENT 1 47 #endif 48 49 50 #if defined(HAVE_PSG_CLIENT) 51 BEGIN_NCBI_SCOPE 52 53 54 55 class CPSG_Exception : public CException 56 { 57 public: 58 enum EErrCode { 59 eTimeout, 60 eServerError, 61 eInternalError, 62 eParameterMissing 63 }; 64 65 virtual const char* GetErrCodeString(void) const override; 66 67 NCBI_EXCEPTION_DEFAULT(CPSG_Exception, CException); 68 }; 69 70 71 72 /// Request to the PSG server (see "CPSG_Request_*" below) 73 /// 74 75 class CPSG_Request 76 { 77 public: 78 /// Get the user-provided context 79 template<typename TUserContext> GetUserContext() const80 shared_ptr<TUserContext> GetUserContext() const 81 { return static_pointer_cast<TUserContext>(m_UserContext); } 82 83 /// Get request context GetRequestContext() const84 CRef<CRequestContext> GetRequestContext() const { return m_RequestContext; } 85 86 /// Get request type GetType() const87 string GetType() const { return x_GetType(); } 88 89 // Get request ID GetId() const90 string GetId() const { return x_GetId(); } 91 92 /// Set hops SetHops(unsigned hops)93 void SetHops(unsigned hops) { m_Hops = hops; } 94 95 protected: CPSG_Request(shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})96 CPSG_Request(shared_ptr<void> user_context = {}, 97 CRef<CRequestContext> request_context = {}) 98 : m_UserContext(user_context), 99 m_RequestContext(request_context) 100 {} 101 102 virtual ~CPSG_Request() = default; 103 104 private: 105 virtual string x_GetType() const = 0; 106 virtual string x_GetId() const = 0; 107 virtual void x_GetAbsPathRef(ostream&) const = 0; 108 109 shared_ptr<void> m_UserContext; 110 CRef<CRequestContext> m_RequestContext; 111 unsigned m_Hops = 0; 112 113 friend class CPSG_Queue; 114 }; 115 116 117 118 /// Bio-id (such as accession) 119 /// 120 class CPSG_BioId 121 { 122 public: 123 using TType = objects::CSeq_id::E_Choice; 124 125 /// @param id 126 /// Bio ID (like accession) CPSG_BioId(string id,TType type={})127 CPSG_BioId(string id, TType type = {}) : m_Id(move(id)), m_Type(type) {} 128 129 /// Get tilde-separated string representation of this bio ID (e.g. for logging) 130 string Repr() const; 131 132 /// Get ID GetId() const133 const string& GetId() const { return m_Id; } 134 135 /// Get type GetType() const136 TType GetType() const { return m_Type; } 137 138 private: 139 string m_Id; 140 TType m_Type; 141 }; 142 143 144 145 /// Blob data unique ID 146 /// 147 class CPSG_DataId 148 { 149 public: 150 virtual ~CPSG_DataId() = default; 151 152 /// Get tilde-separated string representation of this data ID (e.g. for logging) 153 virtual string Repr() const = 0; 154 }; 155 156 157 158 /// Blob unique ID 159 /// 160 class CPSG_BlobId : public CPSG_DataId 161 { 162 public: 163 using TLastModified = CNullable<Int8>; 164 165 /// Mainstream blob ID ctor - from a string ID 166 /// @param id 167 /// Blob ID CPSG_BlobId(string id,TLastModified last_modified={})168 CPSG_BlobId(string id, TLastModified last_modified = {}) 169 : m_Id(move(id)), 170 m_LastModified(move(last_modified)) 171 {} 172 173 /// Historical blob ID system -- based on the "satellite" and the "key" 174 /// inside it. It'll be translated into "<sat>.<sat_key>" string. 175 /// @sa objects::CID2_Blob_Id::TSat, objects::CID2_Blob_Id::TSat_key CPSG_BlobId(int sat,int sat_key,TLastModified last_modified={})176 CPSG_BlobId(int sat, int sat_key, TLastModified last_modified = {}) 177 : m_Id(to_string(sat) + "." + to_string(sat_key)), 178 m_LastModified(move(last_modified)) 179 {} 180 181 /// Get tilde-separated string representation of this blob ID (e.g. for logging) 182 string Repr() const override; 183 184 /// Get ID GetId() const185 const string& GetId() const { return m_Id; } 186 187 /// Get last modified GetLastModified() const188 const TLastModified& GetLastModified() const { return m_LastModified; } 189 190 private: 191 string m_Id; 192 TLastModified m_LastModified; 193 }; 194 195 196 197 /// Chunk unique ID 198 /// 199 class CPSG_ChunkId : public CPSG_DataId 200 { 201 public: CPSG_ChunkId(Uint8 id2_chunk,string id2_info)202 CPSG_ChunkId(Uint8 id2_chunk, string id2_info) 203 : m_Id2Chunk(id2_chunk), 204 m_Id2Info(move(id2_info)) 205 {} 206 207 /// Get tilde-separated string representation of this chunk ID (e.g. for logging) 208 string Repr() const override; 209 210 /// Get ID2 chunk number GetId2Chunk() const211 Uint8 GetId2Chunk() const { return m_Id2Chunk; } 212 213 /// Get ID2 info GetId2Info() const214 const string& GetId2Info() const { return m_Id2Info; } 215 216 private: 217 Uint8 m_Id2Chunk; 218 string m_Id2Info; 219 }; 220 221 222 223 /// Whether and how to substitute version-less primary seq-ids with 224 /// the "more unique" secondary seq-ids 225 enum class EPSG_AccSubstitution { 226 Default, ///< Substitute always (default) 227 Limited, ///< Substitute only if the resolved record's seq_id_type is GI(12) 228 Never ///< No substitution whatsoever - return exact raw accession info 229 }; 230 231 232 233 /// Request to the PSG server (by bio-id, for a biodata specific info and data) 234 /// 235 236 class CPSG_Request_Biodata : public CPSG_Request 237 { 238 public: 239 /// CPSG_Request_Biodata(CPSG_BioId bio_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})240 CPSG_Request_Biodata(CPSG_BioId bio_id, 241 shared_ptr<void> user_context = {}, 242 CRef<CRequestContext> request_context = {}) 243 : CPSG_Request(user_context, request_context), 244 m_BioId(bio_id) 245 {} 246 GetBioId() const247 const CPSG_BioId& GetBioId() const { return m_BioId; } 248 249 /// Specify which info and data is needed 250 enum EIncludeData { 251 /// Server default 252 eDefault, 253 254 /// Only the info 255 eNoTSE, 256 257 /// If ID2 split is available, return split info blob only. 258 /// Otherwise, return no data. 259 eSlimTSE, 260 261 /// If ID2 split is available, return split info blob only. 262 /// Otherwise, return all Cassandra data chunks of the blob itself. 263 eSmartTSE, 264 265 /// If ID2 split is available, return all split blobs. 266 /// Otherwise, return all Cassandra data chunks of the blob itself. 267 eWholeTSE, 268 269 /// Return all Cassandra data chunks of the blob itself. 270 eOrigTSE 271 }; IncludeData(EIncludeData include)272 void IncludeData(EIncludeData include) { m_IncludeData = include; } 273 GetIncludeData() const274 EIncludeData GetIncludeData() const { return m_IncludeData; } 275 276 using TExcludeTSEs = vector<CPSG_BlobId>; 277 ExcludeTSE(CPSG_BlobId blob_id)278 void ExcludeTSE(CPSG_BlobId blob_id) { m_ExcludeTSEs.emplace_back(move(blob_id)); } 279 GetExcludeTSEs() const280 const TExcludeTSEs& GetExcludeTSEs() const { return m_ExcludeTSEs; } 281 282 /// Set substitution policy for version-less primary seq-ids SetAccSubstitution(EPSG_AccSubstitution acc_substitution)283 void SetAccSubstitution(EPSG_AccSubstitution acc_substitution) { m_AccSubstitution = acc_substitution; } 284 285 /// Enable/disable auto blob skipping on server for this request SetAutoBlobSkipping(bool auto_blob_skipping)286 void SetAutoBlobSkipping(bool auto_blob_skipping) { m_AutoBlobSkipping = auto_blob_skipping ? eOn : eOff; } 287 288 private: x_GetType() const289 string x_GetType() const override { return "biodata"; } x_GetId() const290 string x_GetId() const override { return GetBioId().Repr(); } 291 void x_GetAbsPathRef(ostream&) const override; 292 293 CPSG_BioId m_BioId; 294 EIncludeData m_IncludeData = EIncludeData::eDefault; 295 TExcludeTSEs m_ExcludeTSEs; 296 EPSG_AccSubstitution m_AccSubstitution = EPSG_AccSubstitution::Default; 297 ESwitch m_AutoBlobSkipping = ESwitch::eDefault; 298 }; 299 300 301 302 /// Request to the PSG server (by bio-id, for a biodata specific info and data) 303 /// 304 305 class CPSG_Request_Resolve : public CPSG_Request 306 { 307 public: 308 /// CPSG_Request_Resolve(CPSG_BioId bio_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})309 CPSG_Request_Resolve(CPSG_BioId bio_id, 310 shared_ptr<void> user_context = {}, 311 CRef<CRequestContext> request_context = {}) 312 : CPSG_Request(user_context, request_context), 313 m_BioId(bio_id) 314 {} 315 GetBioId() const316 const CPSG_BioId& GetBioId() const { return m_BioId; } 317 318 /// Specify which info and data is needed 319 enum EIncludeInfo : unsigned { 320 // These flags correspond exactly to the CPSG_BioseqInfo's getters 321 fCanonicalId = (1 << 1), 322 fName = (1 << 2), ///< Requests name to use for canonical bio-id 323 fOtherIds = (1 << 3), 324 fMoleculeType = (1 << 4), 325 fLength = (1 << 5), 326 fChainState = (1 << 6), 327 fState = (1 << 7), 328 fBlobId = (1 << 8), 329 fTaxId = (1 << 9), 330 fHash = (1 << 10), 331 fDateChanged = (1 << 11), 332 fGi = (1 << 12), 333 fAllInfo = numeric_limits<unsigned>::max() 334 }; 335 DECLARE_SAFE_FLAGS_TYPE(EIncludeInfo, TIncludeInfo); IncludeInfo(TIncludeInfo include)336 void IncludeInfo(TIncludeInfo include) { m_IncludeInfo = include; } 337 GetIncludeInfo() const338 TIncludeInfo GetIncludeInfo() const { return m_IncludeInfo; } 339 340 /// Set substitution policy for version-less primary seq-ids SetAccSubstitution(EPSG_AccSubstitution acc_substitution)341 void SetAccSubstitution(EPSG_AccSubstitution acc_substitution) { m_AccSubstitution = acc_substitution; } 342 343 private: x_GetType() const344 string x_GetType() const override { return "resolve"; } x_GetId() const345 string x_GetId() const override { return GetBioId().Repr(); } 346 void x_GetAbsPathRef(ostream&) const override; 347 348 CPSG_BioId m_BioId; 349 TIncludeInfo m_IncludeInfo = TIncludeInfo(0); 350 EPSG_AccSubstitution m_AccSubstitution = EPSG_AccSubstitution::Default; 351 }; 352 353 354 355 /// Request to the PSG server (by blob-id, for a particular blob of data) 356 /// 357 358 class CPSG_Request_Blob : public CPSG_Request 359 { 360 public: 361 /// CPSG_Request_Blob(CPSG_BlobId blob_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})362 CPSG_Request_Blob(CPSG_BlobId blob_id, 363 shared_ptr<void> user_context = {}, 364 CRef<CRequestContext> request_context = {}) 365 : CPSG_Request(move(user_context), move(request_context)), 366 m_BlobId(move(blob_id)) 367 {} 368 GetBlobId() const369 const CPSG_BlobId& GetBlobId() const { return m_BlobId; } 370 371 /// Specify which data is needed (info is always returned) 372 using EIncludeData = CPSG_Request_Biodata::EIncludeData; IncludeData(EIncludeData include)373 void IncludeData(EIncludeData include) { m_IncludeData = include; } 374 GetIncludeData() const375 EIncludeData GetIncludeData() const { return m_IncludeData; } 376 377 private: x_GetType() const378 string x_GetType() const override { return "blob"; } x_GetId() const379 string x_GetId() const override { return GetBlobId().Repr(); } 380 void x_GetAbsPathRef(ostream&) const override; 381 382 CPSG_BlobId m_BlobId; 383 EIncludeData m_IncludeData = EIncludeData::eDefault; 384 }; 385 386 387 388 /// Request meta-information for the named annotations which are defined on the 389 /// bioseq 390 /// 391 392 class CPSG_Request_NamedAnnotInfo : public CPSG_Request 393 { 394 public: 395 /// Names of the named annotations 396 using TAnnotNames = vector<string>; 397 398 /// @param bio_id 399 /// ID of the bioseq 400 /// @param annot_names 401 /// List of NAs for which to request the metainfo CPSG_Request_NamedAnnotInfo(CPSG_BioId bio_id,TAnnotNames annot_names,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})402 CPSG_Request_NamedAnnotInfo(CPSG_BioId bio_id, 403 TAnnotNames annot_names, 404 shared_ptr<void> user_context = {}, 405 CRef<CRequestContext> request_context = {}) 406 : CPSG_Request(user_context, request_context), 407 m_BioId(bio_id), 408 m_AnnotNames(annot_names) 409 {} 410 GetBioId() const411 const CPSG_BioId& GetBioId() const { return m_BioId; } GetAnnotNames() const412 const TAnnotNames& GetAnnotNames() const { return m_AnnotNames; } 413 414 /// Set substitution policy for version-less primary seq-ids SetAccSubstitution(EPSG_AccSubstitution acc_substitution)415 void SetAccSubstitution(EPSG_AccSubstitution acc_substitution) { m_AccSubstitution = acc_substitution; } 416 417 /// Specify which data is needed (info is always returned) 418 using EIncludeData = CPSG_Request_Biodata::EIncludeData; IncludeData(EIncludeData include)419 void IncludeData(EIncludeData include) { m_IncludeData = include; } 420 GetIncludeData() const421 EIncludeData GetIncludeData() const { return m_IncludeData; } 422 423 private: x_GetType() const424 string x_GetType() const override { return "annot"; } x_GetId() const425 string x_GetId() const override { return GetBioId().Repr(); } 426 void x_GetAbsPathRef(ostream&) const override; 427 428 CPSG_BioId m_BioId; 429 TAnnotNames m_AnnotNames; 430 EPSG_AccSubstitution m_AccSubstitution = EPSG_AccSubstitution::Default; 431 EIncludeData m_IncludeData = EIncludeData::eDefault; 432 }; 433 434 435 436 /// Request blob data chunk 437 /// 438 439 class CPSG_Request_Chunk : public CPSG_Request 440 { 441 public: CPSG_Request_Chunk(CPSG_ChunkId chunk_id,shared_ptr<void> user_context={},CRef<CRequestContext> request_context={})442 CPSG_Request_Chunk(CPSG_ChunkId chunk_id, 443 shared_ptr<void> user_context = {}, 444 CRef<CRequestContext> request_context = {}) 445 : CPSG_Request(move(user_context), move(request_context)), 446 m_ChunkId(move(chunk_id)) 447 {} 448 GetChunkId() const449 const CPSG_ChunkId& GetChunkId() const { return m_ChunkId; } 450 451 private: x_GetType() const452 string x_GetType() const override { return "chunk"; } x_GetId() const453 string x_GetId() const override { return GetChunkId().Repr(); } 454 void x_GetAbsPathRef(ostream&) const override; 455 456 CPSG_ChunkId m_ChunkId; 457 }; 458 459 460 461 /// Retrieval result 462 /// @sa GetStatus 463 enum class EPSG_Status { 464 eSuccess, ///< Successfully retrieved 465 eInProgress, ///< Retrieval is not finalized yet, more info may come 466 eNotFound, ///< Not found 467 eCanceled, ///< Request canceled 468 469 /// An error was encountered while trying to send request or to read 470 /// and to process the reply. 471 /// If PSG server sends a message with severity: 472 /// - Error, Critical or Fatal -- this status will be set, and any data 473 /// data in the reply item must be considered invalid; such messages 474 /// will also be logged by the client API with severity Error. 475 /// - Trace, Info or Warning -- are considered to be informational, so 476 /// these do NOT affect the status; such messages however will still 477 /// be logged by the client API with the same (T, I or W) severity. 478 eError 479 }; 480 481 482 483 class CPSG_Reply; 484 485 486 487 /// A self-containing part of the reply, e.g. a meta-data or a data blob. 488 489 class CPSG_ReplyItem 490 { 491 public: 492 enum EType { 493 eBlobData, 494 eBlobInfo, 495 eSkippedBlob, 496 eBioseqInfo, 497 eNamedAnnotInfo, 498 ePublicComment, 499 eProcessor, 500 eEndOfReply, ///< No more items expected in the (overall!) reply 501 }; 502 GetType() const503 EType GetType() const { return m_Type; } 504 505 /// Get the final result of this blob's retrieval. 506 /// If the blob retrieval is not finalized by the deadline, then 507 /// "eInProgress" is returned. 508 EPSG_Status GetStatus(CDeadline deadline) const; 509 510 /// Unstructured text containing auxiliary info about the result -- 511 /// such as messages and errors that came from the PSG server or occured 512 /// while trying to send request or to read and to process the reply. 513 string GetNextMessage() const; 514 515 /// Get the reply that contains this item GetReply() const516 shared_ptr<CPSG_Reply> GetReply() const { return m_Reply; } 517 518 /// Get processor ID GetProcessorId()519 const string& GetProcessorId() { return m_ProcessorId; } 520 521 virtual ~CPSG_ReplyItem(); 522 523 protected: 524 CPSG_ReplyItem(EType type); 525 526 private: 527 struct SImpl; 528 unique_ptr<SImpl> m_Impl; 529 shared_ptr<CPSG_Reply> m_Reply; 530 const EType m_Type; 531 string m_ProcessorId; 532 533 friend class CPSG_Reply; 534 }; 535 536 537 538 /// Blob data. 539 540 class CPSG_BlobData : public CPSG_ReplyItem 541 { 542 public: 543 /// Get data ID 544 template <class TDataId = CPSG_DataId> GetId() const545 const TDataId* GetId() const { return dynamic_cast<const TDataId*>(m_Id.get()); } 546 547 /// Get the stream from which to read the item's content. 548 /// @note If no content, then reading from the stream will result in EOF. GetStream() const549 istream& GetStream() const { return *m_Stream; } 550 551 private: 552 CPSG_BlobData(unique_ptr<CPSG_DataId> id); 553 554 unique_ptr<CPSG_DataId> m_Id; 555 unique_ptr<istream> m_Stream; 556 557 friend class CPSG_Reply; 558 }; 559 560 561 562 /// Blob data meta information 563 564 class CPSG_BlobInfo : public CPSG_ReplyItem 565 { 566 public: 567 /// Get data ID 568 template <class TDataId = CPSG_DataId> GetId() const569 const TDataId* GetId() const { return dynamic_cast<const TDataId*>(m_Id.get()); } 570 571 /// Get data compression algorithm: gzip, bzip2, zip, compress, nlmzip, ... 572 /// Return empty string if the blob data is not compressed 573 string GetCompression() const; 574 575 /// Get data serialization format: asn.1, asn1-text, json, xml, ... 576 string GetFormat() const; 577 578 /// Get size of the blob data (as it is stored) 579 Uint8 GetStorageSize() const; 580 581 /// Get size of the real (before any compression or encryption) blob data 582 Uint8 GetSize() const; 583 584 /// Return TRUE if the blob data is "dead" 585 bool IsDead() const; 586 587 /// Return TRUE if the blob data is "suppressed" 588 bool IsSuppressed() const; 589 590 /// Return TRUE if the blob data is "withdrawn" 591 bool IsWithdrawn() const; 592 593 /// Date when the blob data will be released for public use. 594 /// If the blob data is already released, then return "empty" (IsEmpty()) time 595 CTime GetHupReleaseDate() const; 596 597 /// Blob data owner's ID 598 Uint8 GetOwner() const; 599 600 /// Date when the blob data was first loaded into the database 601 CTime GetOriginalLoadDate() const; 602 603 /// Class of this blob data 604 objects::CBioseq_set::EClass GetClass() const; 605 606 /// Internal division value (used by various dumpers) 607 string GetDivision() const; 608 609 /// Name of the user who loaded this blob data 610 string GetUsername() const; 611 612 /// Get ID2 info 613 string GetId2Info() const; 614 615 /// Get number of chunks 616 Uint8 GetNChunks() const; 617 618 private: 619 CPSG_BlobInfo(unique_ptr<CPSG_DataId> id); 620 621 unique_ptr<CPSG_DataId> m_Id; 622 CJsonNode m_Data; 623 624 friend class CPSG_Reply; 625 }; 626 627 628 629 /// Skipped blob. 630 631 class CPSG_SkippedBlob : public CPSG_ReplyItem 632 { 633 public: 634 enum EReason { 635 eExcluded, // Explicitly excluded by the client 636 eInProgress, // Is being sent to the client 637 eSent, // Already sent to the client 638 eUnknown, // Skipped for unknown reason 639 }; 640 641 /// Get blob ID GetId() const642 const CPSG_BlobId& GetId() const { return m_Id; } 643 644 // Get reason for blob skipping GetReason() const645 EReason GetReason() const { return m_Reason; } 646 647 private: 648 CPSG_SkippedBlob(CPSG_BlobId id, EReason reason); 649 650 CPSG_BlobId m_Id; 651 EReason m_Reason; 652 653 friend class CPSG_Reply; 654 }; 655 656 657 658 /// Bio-sequence metainfo -- result of the bio-id resolution. 659 /// 660 /// It can be used to identify which data blobs (related to the requested 661 /// bio-id retrieval) server is sending right away. It also contains 662 /// resolution information as well as the information about which 663 /// other biodata-related blobs are also available on the server and how 664 /// they can be explicitly requested for later retrieval, if needed. 665 /// 666 /// @note 667 /// Most of the data comes from table "BIOSEQ_INFO" and from the named 668 /// annotation tables. 669 670 class CPSG_BioseqInfo : public CPSG_ReplyItem 671 { 672 public: 673 /// Get canonical bio-id for the bioseq (usually "accession.version") 674 CPSG_BioId GetCanonicalId() const; 675 676 /// Get non-canonical bio-ids (aliases) for the bioseq 677 vector<CPSG_BioId> GetOtherIds() const; 678 679 /// The bioseq's molecule type (DNA, RNA, protein, etc) 680 objects::CSeq_inst::TMol GetMoleculeType() const; 681 682 /// Length of bio-sequence 683 Uint8 GetLength() const; 684 685 /// State of the bio-sequence's seq-id 686 enum EState { 687 eDead = 0, 688 eSought = 1, 689 eReserved = 5, 690 eMerged = 7, 691 eLive = 10 692 }; 693 typedef int TState; ///< @sa EState 694 695 /// State of the bio-sequence's seq-id chain, i.e. the state of the very 696 /// latest seq-id in this bio-sequence's seq-id chain 697 TState GetChainState() const; 698 699 /// State of this exact bio-sequence's seq-id. 700 /// I.e., for the latest seq-id in a chain it is equal to GetState(), and 701 /// for all other seq-ids in a chain it's zero (eDead). 702 TState GetState() const; 703 704 /// Get coordinates of the TSE blob that contains the bioseq itself 705 CPSG_BlobId GetBlobId() const; 706 707 /// Get the bioseq's taxonomy ID 708 TTaxId GetTaxId() const; 709 710 /// Get the bioseq's (pre-calculated) hash 711 int GetHash() const; 712 713 /// Date when the bioseq was changed last time 714 CTime GetDateChanged() const; 715 716 /// Get GI 717 TGi GetGi() const; 718 719 /// What data is immediately available now. Other data will require 720 /// a separate hit to the server. 721 /// @sa CPSG_Request_Resolve::IncludeInfo() 722 CPSG_Request_Resolve::TIncludeInfo IncludedInfo() const; 723 724 private: 725 CPSG_BioseqInfo(); 726 727 CJsonNode m_Data; 728 729 friend class CPSG_Reply; 730 }; 731 732 733 734 /// Named Annotations (NAs) metainfo -- reply to CPSG_Request_NamedAnnotInfo. 735 /// 736 /// It can be used to identify where various types of requested NAs are located 737 /// on the bioseq. It also provides information how to retrieve the 738 /// corresponding NA data blobs (as needed). 739 740 class CPSG_NamedAnnotInfo : public CPSG_ReplyItem 741 { 742 public: 743 /// Name of the annotation GetName() const744 const string& GetName() const { return m_Name; } 745 746 /// Annotated bio-id 747 CPSG_BioId GetAnnotatedId() const; 748 749 /// Range where the feature(s) from this NA appear on the bio-sequence 750 CRange<TSeqPos> GetRange() const; 751 752 /// Coordinates of the blob that contains the NA data 753 CPSG_BlobId GetBlobId() const; 754 755 /// Available zoom levels 756 using TZoomLevel = unsigned int; 757 using TZoomLevels = vector<TZoomLevel>; 758 TZoomLevels GetZoomLevels() const; 759 760 /// 761 struct SAnnotInfo 762 { 763 using TAnnotType = objects::CSeq_annot::C_Data::E_Choice; 764 765 TAnnotType annot_type; 766 int feat_type; 767 int feat_subtype; 768 }; 769 770 using TAnnotInfoList = list<SAnnotInfo>; 771 TAnnotInfoList GetAnnotInfoList() const; 772 773 /// Base64 encoded asn.1 of ID2-Seq-annot-Info 774 string GetId2AnnotInfo() const; 775 776 /// Detailed ID2-Seq-annot-Info structures (from GetId2AnnotInfo, decoded) 777 /// @sa GetId2AnnotInfo 778 /// @{ 779 using TId2AnnotInfo = objects::CID2S_Seq_annot_Info; 780 using TId2AnnotInfoList = list<CRef<TId2AnnotInfo>>; 781 TId2AnnotInfoList GetId2AnnotInfoList() const; 782 /// @} 783 784 private: 785 CPSG_NamedAnnotInfo(string name); 786 787 string m_Name; 788 CJsonNode m_Data; 789 790 friend class CPSG_Reply; 791 }; 792 793 794 795 /// Public comment 796 797 class CPSG_PublicComment : public CPSG_ReplyItem 798 { 799 public: 800 /// Get data ID for this public comment 801 template <class TDataId = CPSG_DataId> GetId() const802 const TDataId* GetId() const { return dynamic_cast<const TDataId*>(m_Id.get()); } 803 804 /// Get text GetText() const805 const string& GetText() const { return m_Text; } 806 807 private: 808 CPSG_PublicComment(unique_ptr<CPSG_DataId> id, string text); 809 810 unique_ptr<CPSG_DataId> m_Id; 811 string m_Text; 812 813 friend class CPSG_Reply; 814 }; 815 816 817 818 /// PSG reply -- corresponds to a PSG request. It is used to retrieve data 819 /// (accession resolution; bio-sequence; annotation blobs) from the storage. 820 /// 821 /// Reply may contain: 822 /// - Reply items (CPSG_ReplyItem), each of which in turn may contain 823 /// item-specific info and/or data blob 824 /// - Server messages related to the whole reply 825 /// 826 827 class CPSG_Reply 828 { 829 public: 830 /// Get the final result of this whole reply's retrieval. 831 /// If the reply retrieval is not finalized by the deadline, then 832 /// "eInProgress" is returned. 833 EPSG_Status GetStatus(CDeadline deadline) const; 834 835 /// Unstructured text containing auxiliary info about the result -- 836 /// such as messages and errors that came from the PSG server or occured 837 /// while trying to send request or to read and to process the reply. 838 string GetNextMessage() const; 839 840 /// Get the request that resulted in this reply GetRequest() const841 shared_ptr<const CPSG_Request> GetRequest() const { return m_Request; } 842 843 /// Get the next item which has started arriving from the server. 844 /// @note 845 /// Some of the item's data may still be in transit or not even sent 846 /// in by the server yet. 847 /// @param deadline 848 /// Until what time to wait for the next item to start coming in. 849 /// @return 850 /// - The item objects from which you can start reading data 851 /// - If no more items expected in the reply, the returned item will have 852 /// type eEndOfReply 853 /// - On expired timeout, the returned pointer will be empty (nullptr) 854 /// @throw 855 /// If an error has been detected. 856 shared_ptr<CPSG_ReplyItem> GetNextItem(CDeadline deadline); 857 858 ~CPSG_Reply(); 859 860 private: 861 CPSG_Reply(); 862 863 struct SImpl; 864 unique_ptr<SImpl> m_Impl; 865 shared_ptr<const CPSG_Request> m_Request; 866 867 friend class CPSG_Queue; 868 }; 869 870 871 872 /// A queue to retrieve data (accession resolution info; bio-sequence; 873 /// annotation blobs) from the storage. 874 /// 875 /// Call SendRequest() to schedule retrievals (by their bio-ids or 876 /// blob-ids). Then, call GetNextReply() to get the next reply whose data 877 /// has started coming in. 878 /// 879 /// All methods are MT-safe. Data from different replies can be read in 880 /// parallel. 881 /// 882 /// The queue object can be used from more than one thread, either to push 883 /// requests or to get the incoming ready-to-be-retrieved replies. 884 /// 885 /// Results for the requests which were pushed into a given instance of 886 /// the queue will be available for retrieval using this (and only this) queue 887 /// instance regardless of which threads were used to push the request to the 888 /// queue. 889 /// 890 /// If more than one request was pushed into the queue, then the replies to all 891 /// of the requests may come, in any order. 892 /// 893 894 class CPSG_Queue 895 { 896 public: 897 /// Creates an uninitialized instance. 898 /// It allows to postpone queue initialization until later. 899 /// The uninitialized instances can then be initialized using 900 /// regular constructor and move assignment operator. 901 CPSG_Queue(); 902 903 /// @param service 904 /// Either a name of service (which can be resolved into a set of PSG 905 /// servers) or a single fixed PSG server (in format "host:port") 906 CPSG_Queue(const string& service); 907 ~CPSG_Queue(); 908 909 /// Push request into the queue. 910 /// @param request 911 /// The request (containing either bio- or blob-id to retrieve) to send. 912 /// @param deadline 913 /// For how long to try to push the request into the queue. 914 /// @return 915 /// - TRUE if it succeeds in pushing the request into the queue 916 /// - FALSE on timeout (ie. if cannot do it before the specified deadline) 917 /// @throw CPSG_Exception 918 /// If any (non-timeout) error condition occures. 919 /// @sa Get() 920 bool SendRequest(shared_ptr<CPSG_Request> request, 921 CDeadline deadline); 922 923 924 /// Get the next reply which has started arriving from the server. 925 /// @param deadline 926 /// Until what time to wait for the next reply to start coming in. 927 /// @return 928 /// - Reply object from which you can obtain particular items. 929 /// - On expired timeout, the returned pointer will be empty (nullptr). 930 /// @throw 931 /// If an error has been detected. 932 shared_ptr<CPSG_Reply> GetNextReply(CDeadline deadline); 933 934 935 /// Stop accepting new requests. 936 /// All already accepted requests will be processed as usual. 937 /// No requests are accepted after the stop. 938 void Stop(); 939 940 941 /// Stop accepting new requests and 942 /// cancel all requests whose replies have not been returned yet. 943 /// No requests are accepted and no replies are returned after the reset. 944 void Reset(); 945 946 947 /// Check whether the queue was stopped/reset and is now empty. 948 bool IsEmpty() const; 949 950 951 /// Check whether the queue has been initialized. IsInitialized() const952 bool IsInitialized() const { return static_cast<bool>(m_Impl); } 953 954 955 /// Is the queue in a state (possibly temporary) when requests get immediately rejected. 956 bool RejectsRequests() const; 957 958 959 /// Get an API lock. 960 /// Holding this API lock is essential if numerous short-lived queue instances are used. 961 /// It prevents an internal I/O implementation (threads, TCP connections, HTTP sessions, etc) 962 /// from being destroyed (on destroying last remaining queue instance) 963 /// and then re-created (with new queue instance). 964 using TApiLock = shared_ptr<void>; 965 static TApiLock GetApiLock(); 966 967 968 CPSG_Queue(CPSG_Queue&&); 969 CPSG_Queue& operator=(CPSG_Queue&&); 970 971 private: 972 struct SImpl; 973 unique_ptr<SImpl> m_Impl; 974 }; 975 976 977 DECLARE_SAFE_FLAGS(CPSG_Request_Resolve::EIncludeInfo); 978 979 END_NCBI_SCOPE 980 981 982 #endif /* HAVE_PSG_CLIENT */ 983 #endif /* OBJTOOLS__PUBSEQ_GATEWAY__PSG_CLIENT_HPP */ 984