1 #ifndef ALGO_BLAST_API___REMOTE_SERVICES__HPP 2 #define ALGO_BLAST_API___REMOTE_SERVICES__HPP 3 4 /* $Id: blast_services.hpp 575325 2018-11-27 18:22:00Z ucko $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Christiam Camacho, Kevin Bealer 30 * 31 */ 32 33 /// @file blast_services.hpp 34 /// Declares the CBlastServices class. 35 36 #include <corelib/ncbistd.hpp> 37 #include <corelib/ncbiobj.hpp> 38 #include <objects/seqloc/Seq_interval.hpp> 39 #include <objects/blast/blast__.hpp> 40 #include <objects/blast/names.hpp> 41 #include <objects/scoremat/PssmWithParameters.hpp> 42 43 /** @addtogroup AlgoBlast 44 * 45 * @{ 46 */ 47 48 BEGIN_NCBI_SCOPE 49 50 BEGIN_SCOPE(objects) 51 /// forward declaration of ASN.1 object containing PSSM (scoremat.asn) 52 class CBioseq_set; 53 class CSeq_loc; 54 class CSeq_id; 55 class CSeq_align_set; 56 END_SCOPE(objects) 57 58 using namespace ncbi::objects; 59 60 #ifndef NCBI_MODULE 61 #define NCBI_MODULE NETBLAST 62 #endif 63 64 /// RemoteServicesException 65 /// 66 67 class NCBI_XOBJREAD_EXPORT CBlastServicesException : public CException { 68 public: 69 /// Errors are classified into one of two types. 70 enum EErrCode { 71 /// Argument validation failed. 72 eArgErr, 73 74 /// Files were missing or contents were incorrect. 75 eFileErr, 76 77 /// Request failed 78 eRequestErr, 79 80 /// Memory allocation failed. 81 eMemErr 82 }; 83 84 /// Get a message describing the situation leading to the throw. GetErrCodeString() const85 virtual const char* GetErrCodeString() const override 86 { 87 switch ( GetErrCode() ) { 88 case eArgErr: return "eArgErr"; 89 case eFileErr: return "eFileErr"; 90 case eRequestErr: return "eRequestErr"; 91 default: return CException::GetErrCodeString(); 92 } 93 } 94 95 /// Include standard NCBI exception behavior. 96 NCBI_EXCEPTION_DEFAULT(CBlastServicesException, CException); 97 }; 98 99 100 101 /// API for Remote Blast Services 102 /// 103 /// Class to obtain information and data from the Remote BLAST service that is 104 /// not associated with a specific BLAST search 105 106 class NCBI_XOBJREAD_EXPORT CBlastServices : public CObject 107 { 108 public: 109 /// Default constructor CBlastServices()110 CBlastServices() { m_Verbose = false; } 111 112 /// Analogous to CRemoteBlast::SetVerbose SetVerbose(bool value=true)113 void SetVerbose(bool value = true) { m_Verbose = value; } 114 115 /// Returns true if the BLAST database specified exists in the NCBI servers 116 /// @param dbname BLAST database name [in] 117 /// @param is_protein is this a protein database? [in] 118 bool IsValidBlastDb(const string& dbname, bool is_protein); 119 120 /// Retrieve detailed information for one BLAST database 121 /// If information about multiple databases is needed, use 122 /// the other GetDatabaseInfo method. 123 /// 124 /// @param blastdb object describing the database for which to get 125 /// detailed information 126 /// @return Detailed information for the requested BLAST database or an 127 /// empty object is the requested database wasn't found 128 CRef<objects::CBlast4_database_info> 129 GetDatabaseInfo(CRef<objects::CBlast4_database> blastdb); 130 131 /// Retrieve detailed information for databases listed 132 /// in the string. If more than one database is supplied, it 133 /// they should be separated by spaces (e.g., "nt wgs est"). 134 /// 135 /// @param dbname string listing the database(s) 136 /// @param is_protein is a protein for true, otherwise dna 137 /// @param found_all true if all databases were found. 138 /// @param missing_names pointer to an array with missing database(s) 139 /// @return Detailed information for the requested BLAST databases or an 140 /// empty vector if no databases were found. 141 vector< CRef<objects::CBlast4_database_info> > 142 GetDatabaseInfo(const string& dbname, bool is_protein, bool *found_all, 143 vector<string> *missing_names = NULL); 144 /// Same as GetDatabaseInfo but retrieving whole list of database 145 vector< CRef<objects::CBlast4_database_info> > 146 GetDatabaseInfoLegacy(const string& dbname, bool is_protein, bool *found_all, 147 vector<string> *missing_names = NULL); 148 /// Retrieve organism specific repeats databases 149 vector< CRef<objects::CBlast4_database_info> > 150 GetOrganismSpecificRepeatsDatabases(); 151 152 /// Retrieve a list of NCBI taxonomy IDs for which there exists 153 /// windowmasker masking data to support an alternative organism specific 154 /// filtering 155 objects::CBlast4_get_windowmasked_taxids_reply::Tdata 156 GetTaxIdWithWindowMaskerSupport(); 157 158 /// Defines a std::vector of CRef<CSeq_id> 159 typedef vector< CRef<objects::CSeq_id> > TSeqIdVector; 160 /// Defines a std::vector of CRef<CBioseq> 161 typedef vector< CRef<objects::CBioseq> > TBioseqVector; 162 163 /// Get a set of Bioseqs without their sequence data given an input set of 164 /// Seq-ids. 165 /// 166 /// @param seqids A vector of Seq-ids for which Bioseqs are requested. 167 /// @param database A list of databases from which to get the sequences. 168 /// @param seqtype The residue type, 'p' from protein, 'n' for nucleotide. 169 /// @param bioseqs The vector used to return the requested Bioseqs. 170 /// @param errors A null-separated list of errors. 171 /// @param warnings A null-separated list of warnings. 172 /// @param verbose Produce verbose output. [in] 173 /// @param target_only Filter the defline to include only the requested id. [in] 174 /// @todo FIXME: Add retry logic in case of transient errors 175 static void 176 GetSequencesInfo(TSeqIdVector& seqids, // in 177 const string& database, // in 178 char seqtype, // 'p' or 'n' 179 TBioseqVector& bioseqs, // out 180 string& errors, // out 181 string& warnings, // out 182 bool verbose = false, // in 183 bool target_only = false); // in 184 185 /// Get a set of Bioseqs given an input set of Seq-ids. 186 /// 187 /// This retrieves the Bioseqs corresponding to the given Seq-ids 188 /// from the blast4 server. Normally this will be much faster 189 /// than consulting ID1 seperately for each sequence. Sometimes 190 /// there are multiple sequences for a given Seq-id. In such 191 /// cases, there are always 'non-ambiguous' ids available. This 192 /// interface does not currently address this issue, and will 193 /// simply return the Bioseqs corresponding to one of the 194 /// sequences. Errors will be returned if the operation cannot be 195 /// completed (or started). In the case of a sequence that cannot 196 /// be found, the error will indicate the index of (and Seq-id of) 197 /// the missing sequence; processing will continue, and the 198 /// sequences that can be found will be returned along with the 199 /// error. 200 /// 201 /// @param seqids A vector of Seq-ids for which Bioseqs are requested. 202 /// @param database A list of databases from which to get the sequences. 203 /// @param seqtype The residue type, 'p' from protein, 'n' for nucleotide. 204 /// @param bioseqs The vector used to return the requested Bioseqs. 205 /// @param errors A null-separated list of errors. 206 /// @param warnings A null-separated list of warnings. 207 /// @param verbose Produce verbose output. [in] 208 /// @param target_only Filter the defline to include only the requested id. [in] 209 /// @todo FIXME: Add retry logic in case of transient errors 210 static void 211 GetSequences(TSeqIdVector& seqids, // in 212 const string& database, // in 213 char seqtype, // 'p' or 'n' 214 TBioseqVector& bioseqs, // out 215 string& errors, // out 216 string& warnings, // out 217 bool verbose = false, // in 218 bool target_only = false); // in 219 /// Defines a std::vector of CRef<CSeq_interval> 220 typedef vector< CRef<objects::CSeq_interval> > TSeqIntervalVector; 221 /// Defines a std::vector of CRef<CSeq_data> 222 typedef vector< CRef<objects::CSeq_data> > TSeqDataVector; 223 224 /// This retrieves (partial) sequence data from the remote BLAST server. 225 /// 226 /// @param seqid 227 /// A vector of Seq-ids for which sequence data are requested. [in] 228 /// @param database 229 /// A list of databases from which to get the sequences. [in] 230 /// @param seqtype 231 /// The residue type, 'p' from protein, 'n' for nucleotide. [in] 232 /// @param ids 233 /// The sequence IDs for those sequences which the seq data was 234 // obtained successfully [out] 235 /// @param seq_data 236 /// Sequence data in CSeq_data format. [out] 237 /// @param errors 238 /// An error message (if any). [out] 239 /// @param warnings 240 /// A warning (if any). [out] 241 /// @param verbose 242 /// Produce verbose output. [in] 243 /// @todo FIXME: Add retry logic in case of transient errors 244 static void 245 GetSequenceParts(const TSeqIntervalVector & seqids, // in 246 const string & database, // in 247 char seqtype, // 'p' or 'n' 248 TSeqIdVector & ids, // out 249 TSeqDataVector & seq_data, // out 250 string & errors, // out 251 string & warnings, // out 252 bool verbose = false);// in 253 254 private: 255 256 /// Retrieve the BLAST databases available for searching 257 void x_GetAvailableDatabases(); 258 259 /// Look for a database matching this method's argument and returned 260 /// detailed information about it. 261 /// @param blastdb database description 262 /// @return detailed information about the database requested or an empty 263 /// CRef<> if the database was not found 264 CRef<objects::CBlast4_database_info> 265 x_FindDbInfoFromAvailableDatabases(CRef<objects::CBlast4_database> blastdb); 266 267 /// Prohibit copy construction. 268 CBlastServices(const CBlastServices &); 269 270 /// Prohibit assignment. 271 CBlastServices & operator=(const CBlastServices &); 272 273 274 // Data 275 276 /// BLAST databases available to search 277 objects::CBlast4_get_databases_reply::Tdata m_AvailableDatabases; 278 /// Taxonomy IDs for which there's windowmasker masking data at NCBI 279 objects::CBlast4_get_windowmasked_taxids_reply::Tdata m_WindowMaskedTaxIds; 280 /// Display verbose output to stdout? 281 bool m_Verbose; 282 }; 283 284 #undef NCBI_MODULE 285 286 END_NCBI_SCOPE 287 288 /* @} */ 289 290 #endif /* ALGO_BLAST_API___REMOTE_SERVICES__HPP */ 291