1 /* $Id: blast_results.hpp 542379 2017-07-31 13:06:45Z dicuccio $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Jason Papadopoulos 27 * 28 */ 29 30 /** @file blast_results.hpp 31 * Definition of classes which constitute the results of running a BLAST 32 * search 33 */ 34 35 #ifndef ALGO_BLAST_API___BLAST_RESULTS_HPP 36 #define ALGO_BLAST_API___BLAST_RESULTS_HPP 37 38 #include <algo/blast/core/blast_stat.h> 39 #include <algo/blast/api/blast_aux.hpp> 40 41 /** @addtogroup AlgoBlast 42 * 43 * @{ 44 */ 45 46 BEGIN_NCBI_SCOPE 47 BEGIN_SCOPE(blast) 48 49 /// Class used to return ancillary data from a blast search, 50 /// i.e. information that is not the list of alignment found 51 class NCBI_XBLAST_EXPORT CBlastAncillaryData : public CObject 52 { 53 54 public: 55 56 /// constructor 57 /// @param program_type Type of blast search [in] 58 /// @param query_number The index of the query for which 59 /// information will be retrieved [in] 60 /// @param sbp Score block, containing Karlin parameters [in] 61 /// @param query_info Structure with per-context information [in] 62 CBlastAncillaryData(EBlastProgramType program_type, 63 int query_number, 64 const BlastScoreBlk *sbp, 65 const BlastQueryInfo *query_info); 66 67 /** Parametrized constructor taking pairs of values for ungapped and gapped 68 * Karlin-Altschul parameters as well as the effective search space 69 * @param lambda Pair of ungapped and gapped lambda (in that order) [in] 70 * @param k Pair of ungapped and gapped k (in that order) [in] 71 * @param h Pair of ungapped and gapped h (in that order) [in] 72 * @param effective_search_space effective search space [in] 73 * @param is_psiblast true if the statistical parameters are for PSI-BLAST 74 * [in] 75 */ 76 CBlastAncillaryData(pair<double, double> lambda, 77 pair<double, double> k, 78 pair<double, double> h, 79 Int8 effective_search_space, 80 bool is_psiblast = false); 81 82 /// Destructor 83 ~CBlastAncillaryData(); 84 85 /// Copy-constructor 86 CBlastAncillaryData(const CBlastAncillaryData& rhs); 87 88 /// Assignment operator 89 CBlastAncillaryData& operator=(const CBlastAncillaryData& rhs); 90 91 /// Retrieve gumbel parameters GetGumbelBlk() const92 const Blast_GumbelBlk * GetGumbelBlk() const { 93 return m_GumbelBlk; 94 } 95 96 /// Retrieve ungapped Karlin parameters GetUngappedKarlinBlk() const97 const Blast_KarlinBlk * GetUngappedKarlinBlk() const { 98 return m_UngappedKarlinBlk; 99 } 100 101 /// Retrieve gapped Karlin parameters GetGappedKarlinBlk() const102 const Blast_KarlinBlk * GetGappedKarlinBlk() const { 103 return m_GappedKarlinBlk; 104 } 105 106 /// Retrieve PSI-BLAST ungapped Karlin parameters GetPsiUngappedKarlinBlk() const107 const Blast_KarlinBlk * GetPsiUngappedKarlinBlk() const { 108 return m_PsiUngappedKarlinBlk; 109 } 110 111 /// Retrieve PSI-BLAST gapped Karlin parameters GetPsiGappedKarlinBlk() const112 const Blast_KarlinBlk * GetPsiGappedKarlinBlk() const { 113 return m_PsiGappedKarlinBlk; 114 } 115 /// Retrieve the search space for this query sequence. If the 116 /// results correspond to a blastx search, the search space will 117 /// refer to protein letters GetSearchSpace() const118 Int8 GetSearchSpace() const { 119 return m_SearchSpace; 120 } 121 /// Retrieve the length adjustment for boundary conditions GetLengthAdjustment() const122 Int8 GetLengthAdjustment() const { 123 return m_LengthAdjustment; 124 } 125 /// Set the length adjustment for boundary conditions SetLengthAdjustment(int len_adj)126 void SetLengthAdjustment(int len_adj){ 127 m_LengthAdjustment = len_adj; 128 } 129 private: 130 /// Gumbel parameters for one query 131 Blast_GumbelBlk *m_GumbelBlk; 132 133 /// Ungapped Karlin parameters for one query 134 Blast_KarlinBlk *m_UngappedKarlinBlk; 135 136 /// Gapped Karlin parameters for one query 137 Blast_KarlinBlk *m_GappedKarlinBlk; 138 139 /// PSI-BLAST ungapped Karlin parameters for one query (if applicable) 140 Blast_KarlinBlk *m_PsiUngappedKarlinBlk; 141 142 /// PSI-BLAST gapped Karlin parameters for one query (if applicable) 143 Blast_KarlinBlk *m_PsiGappedKarlinBlk; 144 145 /// Search space used when calculating e-values for one query 146 147 Int8 m_SearchSpace; 148 149 /// Length adjustment for boundary conditions 150 Int8 m_LengthAdjustment; 151 152 /// Workhorse for copy constructor and assignment operator 153 /// @param other object to copy [in] 154 void do_copy(const CBlastAncillaryData& other); 155 }; 156 157 158 /// Search Results for One Query. 159 /// 160 /// This class encapsulates all the search results and related data 161 /// corresponding to one of the input queries. 162 163 class NCBI_XBLAST_EXPORT CSearchResults : public CObject { 164 public: 165 166 /// Constructor 167 /// @param query List of query identifiers [in] 168 /// @param align alignments for a single query sequence [in] 169 /// @param errs error messages for this query sequence [in] 170 /// @param ancillary_data Miscellaneous output from the blast engine [in] 171 /// @param query_masks Mask locations for this query [in] 172 /// @param rid RID (if applicable, else empty string) [in] 173 CSearchResults(CConstRef<objects::CSeq_id> query, 174 CRef<objects::CSeq_align_set> align, 175 const TQueryMessages & errs, 176 CRef<CBlastAncillaryData> ancillary_data, 177 const TMaskedQueryRegions * query_masks = NULL, 178 const string & rid = kEmptyStr, 179 const SPHIQueryInfo * phi_query_info = NULL); 180 181 /// Our destructor 182 ~CSearchResults(); 183 184 /// Sets the RID for these results 185 /// @param rid RID to set [in] SetRID(const string & rid)186 void SetRID(const string& rid) { m_RID.assign(rid); } 187 188 /// Returns the RID for these results (if applicable), otherwise returns an 189 /// empty string GetRID() const190 string GetRID() const { return m_RID; } 191 192 /// Accessor for the Seq-align results GetSeqAlign() const193 CConstRef<objects::CSeq_align_set> GetSeqAlign() const 194 { 195 return m_Alignment; 196 } 197 198 /// Only intended to be used if you need to edit the seqlign. Otherwise 199 ///use GetSeqAlign() SetSeqAlign()200 CRef<objects::CSeq_align_set> SetSeqAlign() 201 { 202 return m_Alignment; 203 } 204 205 /// Return true if there are any alignments for this query 206 bool HasAlignments() const; 207 208 /// Accessor for the query's sequence identifier 209 CConstRef<objects::CSeq_id> GetSeqId() const; 210 211 /// Accessor for the query's search ancillary GetAncillaryData() const212 CRef<CBlastAncillaryData> GetAncillaryData() const 213 { 214 return m_AncillaryData; 215 } 216 217 /// Accessor for the error/warning messsages for this query 218 /// @param min_severity minimum severity to report errors [in] 219 TQueryMessages GetErrors(int min_severity = eBlastSevError) const; 220 221 /// Returns true if there are errors among the results for this object 222 bool HasErrors() const; 223 /// Returns true if there are warnings among the results for this object 224 bool HasWarnings() const; 225 226 /// Retrieve a string with the query identifier followed by the errors 227 /// produced, returns a empty string if HasErrors() returns false. 228 string GetErrorStrings() const; 229 /// Retrieve a string with the query identifier followed by the warnings 230 /// produced, returns a empty string if HasWarnings() returns false. 231 string GetWarningStrings() const; 232 233 /// Retrieve the query regions which were masked by BLAST 234 /// @param flt_query_regions the return value [in|out] 235 void GetMaskedQueryRegions(TMaskedQueryRegions& flt_query_regions) const; 236 237 /// Mutator for the masked query regions, intended to be used by internal 238 /// BLAST APIs to populate this object 239 /// @param flt_query_regions the input value [in] 240 void SetMaskedQueryRegions(const TMaskedQueryRegions& flt_query_regions); 241 242 /// Retrieve the masked locations for the subject sequences in the 243 /// contained alignment 244 /// @param subj_masks masked locations [out] 245 void GetSubjectMasks(TSeqLocInfoVector& subj_masks) const; 246 247 /// Set the masked locations for the subject sequences in the 248 /// contained alignment 249 /// @param subj_masks masked locations [in] 250 void SetSubjectMasks(const TSeqLocInfoVector& subj_masks); 251 252 /// Retrieves PHI-BLAST information about pattern on query. GetPhiQueryInfo() const253 const SPHIQueryInfo * GetPhiQueryInfo() const { 254 return m_PhiQueryInfo; 255 } 256 257 /// Trim align_set size 258 /// @parm size max num of alignments to keep 259 /// (0 will erase all) 260 void TrimSeqAlign(objects::CSeq_align_set::Tdata::size_type max_size); 261 262 protected: 263 /// this query's id 264 CConstRef<objects::CSeq_id> m_QueryId; 265 266 /// alignments for this query 267 CRef<objects::CSeq_align_set> m_Alignment; 268 269 /// error/warning messages for this query 270 TQueryMessages m_Errors; 271 272 /// this query's masked regions 273 TMaskedQueryRegions m_Masks; 274 275 /// the matching subjects masks 276 TSeqLocInfoVector m_SubjectMasks; 277 278 /// non-alignment ancillary data for this query 279 CRef<CBlastAncillaryData> m_AncillaryData; 280 281 /// The RID, if applicable (otherwise it's empty) 282 string m_RID; 283 284 /// PHI-BLAST information. 285 SPHIQueryInfo *m_PhiQueryInfo; 286 287 private: 288 /// Prohibit copy constructor 289 CSearchResults(const CSearchResults& rhs); 290 /// Prohibit assignment operator 291 CSearchResults& operator=(const CSearchResults& rhs); 292 }; 293 294 295 /// Search Results for All Queries. 296 /// 297 /// This class encapsulates all of the search results and related data 298 /// from a search, it supports BLAST database and Bl2Seq searches and provides 299 /// a convenient way of accessing the results from BLAST. 300 /// 301 /// @note When representing BLAST database results, there are 302 /// CSearchResultSet::NumQueries() objects of type CSearchResultSet::value_type 303 /// in this object. When representing Bl2Seq results, there are 304 /// (CSearchResultSet::NumQueries() * number of subjects) objects of type 305 /// CSearchResultSet::value_type in this object. 306 307 class NCBI_XBLAST_EXPORT CSearchResultSet : public CObject { 308 public: 309 /// data type contained by this container 310 typedef CRef<CSearchResults> value_type; 311 312 /// List of query ids. 313 typedef vector< CConstRef<objects::CSeq_id> > TQueryIdVector; 314 315 /// size_type type definition 316 typedef vector<value_type>::size_type size_type; 317 318 /// typedef for a vector of CRef<CBlastAncillaryData> 319 typedef vector< CRef<CBlastAncillaryData> > TAncillaryVector; 320 321 /// const_iterator type definition 322 typedef vector<value_type>::const_iterator const_iterator; 323 324 /// iterator type definition 325 typedef vector<value_type>::iterator iterator; 326 327 /// Simplest constructor 328 CSearchResultSet(EResultType res_type = eDatabaseSearch); 329 330 /// Parametrized constructor 331 /// @param aligns vector of all queries' alignments [in] 332 /// @param msg_vec vector of all queries' messages [in] 333 /// @param res_type result type stored in this object [in] 334 CSearchResultSet(TSeqAlignVector aligns, 335 TSearchMessages msg_vec, 336 EResultType res_type = eDatabaseSearch); 337 338 /// Parametrized constructor 339 /// @param ids vector of all queries' ids [in] 340 /// @param aligns vector of all queries' alignments [in] 341 /// @param msg_vec vector of all queries' messages [in] 342 /// @param ancillary_data vector of per-query search ancillary data [in] 343 /// @param masks Mask locations for this query [in] 344 /// @param res_type result type stored in this object [in] 345 /// @note this constructor assumes that the ids, msg_vec, and 346 /// ancillary_data vectors are of the SAME size as the aligns vector. The 347 /// masks vector can be of the same size as aligns or have as many elements 348 /// as there were queries in the search and they will be adjusted as 349 /// necessary. 350 CSearchResultSet(TQueryIdVector ids, 351 TSeqAlignVector aligns, 352 TSearchMessages msg_vec, 353 TAncillaryVector ancillary_data = 354 TAncillaryVector(), 355 const TSeqLocInfoVector* masks = NULL, 356 EResultType res_type = eDatabaseSearch, 357 const SPHIQueryInfo* phi_query_info = NULL); 358 359 /// Allow array-like access with integer indices to CSearchResults 360 /// contained by this object 361 /// @param i query sequence index if result type is eDatabaseSearch, 362 /// otherwise it's the query-subject index [in] operator [](size_type i)363 CSearchResults & operator[](size_type i) { 364 return *m_Results[i]; 365 } 366 367 /// Allow array-like access with integer indices to const CSearchResults 368 /// contained by this object 369 /// @param i query sequence index if result type is eDatabaseSearch, 370 /// otherwise it's the query-subject index [in] operator [](size_type i) const371 const CSearchResults & operator[](size_type i) const { 372 return *m_Results[i]; 373 } 374 375 /// Retrieve results for a query-subject pair 376 /// contained by this object 377 /// @param qi query sequence index [in] 378 /// @param si subject sequence index [in] 379 /// @note it only works for results of type eSequenceComparison 380 CSearchResults & GetResults(size_type qi, size_type si); 381 382 /// Retrieve results for a query-subject pair 383 /// @param qi query sequence index [in] 384 /// @param si subject sequence index [in] 385 /// @note it only works for results of type eSequenceComparison 386 const CSearchResults & GetResults(size_type qi, size_type si) const; 387 388 /// Allow array-like access with CSeq_id indices to CSearchResults 389 /// contained by this object 390 /// @param ident query sequence identifier [in] 391 /// @note it only works for results of type eDatabaseSearch 392 CRef<CSearchResults> operator[](const objects::CSeq_id & ident); 393 394 /// Allow array-like access with CSeq_id indices to const CSearchResults 395 /// contained by this object 396 /// @param ident query sequence identifier [in] 397 /// @note it only works for results of type eDatabaseSearch 398 CConstRef<CSearchResults> operator[](const objects::CSeq_id & ident) const; 399 400 /// Return the number of results contained by this object 401 /// @note this returns the number of queries for results of type 402 /// eDatabaseSearch and (number of queries * number of subjects) for results 403 /// of type eSequenceComparison GetNumResults() const404 size_type GetNumResults() const 405 { 406 return m_Results.size(); 407 } 408 409 /// Return the number of unique query ID's represented by this object GetNumQueries() const410 size_type GetNumQueries() const 411 { 412 return m_NumQueries; 413 } 414 415 /// Sets the filtered query regions. If results are of type 416 /// eSequenceComparison, the masks can be one for each query and they will 417 /// be duplicated as necessary to meet this class' pre-conditions. 418 void SetFilteredQueryRegions(const TSeqLocInfoVector& masks); 419 /// Retrieves the filtered query regions 420 TSeqLocInfoVector GetFilteredQueryRegions() const; 421 422 /// Identical to GetNumResults, provided to facilitate STL-style iteration 423 /// @sa note in GetNumResults size() const424 size_type size() const { return GetNumResults(); } 425 426 /// Returns whether this container is empty or not. empty() const427 bool empty() const { return size() == 0; } 428 429 /// Returns const_iterator to beginning of container, provided to 430 /// facilitate STL-style iteration begin() const431 const_iterator begin() const { return m_Results.begin(); } 432 433 /// Returns const_iterator to end of container, provided to 434 /// facilitate STL-style iteration end() const435 const_iterator end() const { return m_Results.end(); } 436 437 /// Returns iterator to beginning of container, provided to 438 /// facilitate STL-style iteration begin()439 iterator begin() { return m_Results.begin(); } 440 441 /// Returns iterator to end of container, provided to 442 /// facilitate STL-style iteration end()443 iterator end() { return m_Results.end(); } 444 445 /// Clears the contents of this object clear()446 void clear() { 447 m_NumQueries = 0; 448 m_Results.clear(); 449 } 450 451 /// Add a value to the back of this container 452 /// @param element element to add [in] 453 void push_back(value_type& element); 454 455 /// Get the type of results contained in this object GetResultType() const456 EResultType GetResultType() const { return m_ResultType; } 457 458 /// Sets the RID for these results 459 /// @param rid RID to set [in] 460 void SetRID(const string& rid); 461 462 private: 463 /// Initialize the result set. 464 void x_Init(TQueryIdVector& queries, 465 TSeqAlignVector aligns, 466 TSearchMessages msg_vec, 467 TAncillaryVector ancillary_data, 468 const TSeqLocInfoVector* query_masks, 469 const SPHIQueryInfo* phi_query_info = NULL); 470 471 /// Type of results stored in this object 472 EResultType m_ResultType; 473 474 /// Number of queries 475 size_type m_NumQueries; 476 477 /// Vector of results. 478 vector< CRef<CSearchResults> > m_Results; 479 480 /// True if this object contains PHI-BLAST results 481 bool m_IsPhiBlast; 482 483 /// Stores the masked query regions, for convenience and usage in CBl2Seq 484 TSeqLocInfoVector m_QueryMasks; 485 }; 486 487 END_SCOPE(blast) 488 END_NCBI_SCOPE 489 490 /* @} */ 491 492 #endif /* ALGO_BLAST_API___BLAST_RESULTS_HPP */ 493