1 /* $Id: cuSimpleB2SWrapper.hpp 200108 2010-08-04 17:01:17Z lanczyck $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Chris Lanczycki 27 * 28 * File Description: 29 * 30 * Simplified API for a single blast-two-sequences call. 31 * Does not involve CDs, and NOT optimized (or intended) to be called 32 * in batch mode. If you need to make many calls, use CdBlaster! 33 * 34 * =========================================================================== 35 */ 36 37 #ifndef CU_SIMPLEB2SWRAPPER_HPP 38 #define CU_SIMPLEB2SWRAPPER_HPP 39 40 #include <vector> 41 #include <objmgr/object_manager.hpp> 42 #include <algo/blast/api/psibl2seq.hpp> 43 #include <algo/structure/cd_utils/cuMatrix.hpp> 44 #include <algo/structure/cd_utils/cuScoringMatrix.hpp> 45 46 BEGIN_NCBI_SCOPE 47 USING_SCOPE(blast); 48 BEGIN_SCOPE(cd_utils) 49 50 class NCBI_CDUTILS_EXPORT CSimpleB2SWrapper 51 { RemoveAllDataLoaders()52 static void RemoveAllDataLoaders() { 53 int i = 1; 54 CRef<CObjectManager> om = CObjectManager::GetInstance(); 55 CObjectManager::TRegisteredNames loader_names; 56 om->GetRegisteredNames(loader_names); 57 ITERATE(CObjectManager::TRegisteredNames, itr, loader_names) { 58 cout << "data loader " << i << ": " << *itr << endl; 59 om->RevokeDataLoader(*itr); 60 ++i; 61 } 62 } 63 public: 64 static const unsigned int HITLIST_SIZE_DEFAULT ; 65 static const unsigned int MAX_HITLIST_SIZE ; 66 static const Int8 CDD_DATABASE_SIZE ; 67 static const double E_VAL_DEFAULT ; // default e-value threshold 68 static const double E_VAL_WHEN_NO_SEQ_ALIGN ; // eval when Blast doesn't return a seq-align 69 static const double SCORE_WHEN_NO_SEQ_ALIGN ; 70 static const string SCORING_MATRIX_DEFAULT ; 71 static const ECompoAdjustModes COMPOSITION_ADJ_DEF; 72 static const double DO_NOT_USE_PERC_ID_THRESHOLD ; // user must provide this; no default is set 73 static const Int8 DO_NOT_USE_EFF_SEARCH_SPACE ; // user must provide this; no default is set 74 75 // If the default for 'percIdThold' is used, then the %identity filter will be off in B2S. 76 CSimpleB2SWrapper(double percIdThold = DO_NOT_USE_PERC_ID_THRESHOLD, string matrixName = SCORING_MATRIX_DEFAULT); 77 78 // Searches using the full-length sequences specified. 79 // If the default for 'percIdThold' is used, then the %identity filter will be off in B2S. 80 CSimpleB2SWrapper(CRef<CBioseq>& seq1, CRef<CBioseq>& seq2, double percIdThold = DO_NOT_USE_PERC_ID_THRESHOLD, string matrixName = SCORING_MATRIX_DEFAULT); 81 82 // If from = to = 0, or from > to, use the full length. 83 // From/to are not validated vs. sequence data in 'seq'. SetSeq1(CRef<CBioseq> & seq,unsigned int from=0,unsigned int to=0)84 void SetSeq1(CRef<CBioseq>& seq, unsigned int from = 0, unsigned int to = 0) { SetSeq(seq, true, from, to);} SetSeq2(CRef<CBioseq> & seq,unsigned int from=0,unsigned int to=0)85 void SetSeq2(CRef<CBioseq>& seq, unsigned int from = 0, unsigned int to = 0) { SetSeq(seq, false, from, to);} 86 87 // GENERAL NOTE on Set....() functions: 88 // the return value is the value of the corresponding member variable on completion 89 // (which is also the corresponding value in m_options when m_options is valid). 90 // Invalid values used in these functions are ignored and no changes are made 91 // to the settings, and return value will therefore differ from the passed argument. 92 93 // Must be between 0 and 100; otherwise no change is made. 94 // NOTE: this sets the %identity filter in the b2s algorithm, which seems to calculate %identity using 95 // the smaller of the two sequences. 96 double SetPercIdThreshold(double percIdThold); 97 98 // Must be between 1 and MAX_HITLIST_SIZE, otherwise change is not made. 99 unsigned int SetHitlistSize(unsigned int hitlistSize); 100 101 // Must be > 0, otherwise change is not made. 102 Int8 SetDbLength(Int8 dbLength); 103 104 // Must be > 0, otherwise change is not made. 105 Int8 SetEffSearchSpace(Int8 effSearchSpace); 106 107 // Must be > 0, otherwise change is not made. 108 ECompoAdjustModes SetCompoAdjustMode(ECompoAdjustModes caMode); 109 110 // Sanity checks that eValueThold is non-negative. 111 double SetEValueThreshold(double eValueThold); 112 113 // Set matrix name; sanity checks that matrixName is one of those defined in cuScoringMatrix.hpp 114 string SetMatrixName(string matrixName); 115 116 // Expose the options handle object so any parameter can be manipulated. GetOptionsHandle()117 CRef< CBlastAdvancedProteinOptionsHandle >& GetOptionsHandle() { return m_options; } 118 119 // Do all parameter configurations before calling this method. 120 // E-value threshold is 10.0 unless user has previously called 'SetEValueThreshold'. 121 // Uses Object Manager enabled Blast interface. 122 bool DoBlast2Seqs(); 123 124 // Do all parameter configurations before calling this method. 125 // E-value threshold is 10.0 unless user has previously called 'SetEValueThreshold'. 126 // Uses Object Manager free Blast interface. 127 // bool DoBlast2Seqs_OMFree(); 128 129 // If there are no hits, the returned CRef will be invalid. Test the CRef before using. 130 CRef<CSeq_align> getBestB2SAlignment(double* score = NULL, double* eval = NULL, double* percIdent = NULL) const; 131 getAllHits() const132 const vector<CRef<CSeq_align> >& getAllHits() const {return m_alignments;} getNumHits() const133 unsigned int getNumHits() const {return m_alignments.size();} 134 135 // All hitIndex values are zero-based! 136 bool getPairwiseBlastAlignment(unsigned int hitIndex, CRef< CSeq_align >& seqAlign) const; 137 double getPairwiseScore(unsigned int hitIndex) const; 138 double getPairwiseEValue(unsigned int hitIndex) const; 139 double getPairwisePercIdent(unsigned int hitIndex) const; // %id vs. m_seq1!!! 140 141 private: 142 // long m_dbSize; 143 // int m_dbSeqNum; 144 145 struct SB2SSeq { 146 bool useFull; 147 unsigned int from; 148 unsigned int to; 149 CRef<CBioseq> bs; 150 }; 151 152 SB2SSeq m_seq1; 153 SB2SSeq m_seq2; 154 155 string m_scoringMatrix; 156 unsigned int m_hitlistSize; 157 Int8 m_dbLength; 158 double m_eValueThold; 159 double m_percIdThold; // only used if explicitly set by user. 160 Int8 m_effSearchSpace; // only used if explicitly set by user. 161 ECompoAdjustModes m_caMode; 162 vector< CRef<CSeq_align> > m_alignments; 163 164 vector< double > m_scores; 165 vector< double > m_evals; 166 vector< double > m_percIdents; 167 168 // Stores all options needed for the query. 169 CRef<CBlastAdvancedProteinOptionsHandle> m_options; 170 171 // Create the options object and set defaults. 172 void InitializeToDefaults(); 173 174 void SetSeq(CRef<CBioseq>& seq, bool isSeq1, unsigned int from, unsigned int to); 175 176 // False if there was a problem (e.g., SB2SSeq couldn't provide a Seq-id). 177 bool FillOutSeqLoc(const SB2SSeq& s, CSeq_loc& seqLoc); 178 179 void processBlastHits(ncbi::blast::CSearchResults&); 180 // void processBlastHits_OMFree(ncbi::blast::CSearchResults&); 181 }; 182 183 END_SCOPE(cd_utils) 184 END_NCBI_SCOPE 185 186 #endif // CU_SIMPLEB2SWRAPPER_HPP 187