1 /*  $Id: blast_types.hpp 573102 2018-10-23 12:07:38Z fongah2 $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Ilya Dondoshansky
27  *
28  */
29 
30 /** @file blast_types.hpp
31  * Definitions of special type used in BLAST
32  */
33 
34 #ifndef ALGO_BLAST_API___BLAST_TYPE__HPP
35 #define ALGO_BLAST_API___BLAST_TYPE__HPP
36 
37 #include <corelib/ncbistd.hpp>
38 #include <objects/seqalign/Seq_align_set.hpp>
39 #include <algo/blast/core/blast_export.h>
40 #include <algo/blast/core/blast_message.h>
41 #include <algo/blast/core/blast_def.h>
42 #include <algo/blast/core/blast_filter.h>
43 
44 BEGIN_NCBI_SCOPE
45 BEGIN_SCOPE(blast)
46 
47 /// This enumeration is to evolve into a task/program specific list that
48 /// specifies sets of default parameters to easily conduct searches using
49 /// BLAST.
50 /// @todo EProgram needs to be renamed to denote a task (similar to those
51 /// exposed by the BLAST web page) rather than a program type
52 /// N.B.: When making changes to this enumeration, please update
53 /// blast::ProgramNameToEnum (blast_aux.[ch]pp), blast::GetNumberOfFrames
54 /// (blast_setup_cxx.cpp) and BlastNumber2Program and BlastProgram2Number
55 /// (blast_util.c)
56 enum EProgram {
57     eBlastNotSet = 0,   ///< Not yet set.
58     eBlastn,            ///< Nucl-Nucl (traditional blastn)
59     eBlastp,            ///< Protein-Protein
60     eBlastx,            ///< Translated nucl-Protein
61     eTblastn,           ///< Protein-Translated nucl
62     eTblastx,           ///< Translated nucl-Translated nucl
63     eRPSBlast,          ///< protein-pssm (reverse-position-specific BLAST)
64     eRPSTblastn,        ///< nucleotide-pssm (RPS blast with translated query)
65     eMegablast,         ///< Nucl-Nucl (traditional megablast)
66     eDiscMegablast,     ///< Nucl-Nucl using discontiguous megablast
67     ePSIBlast,          ///< PSI Blast
68     ePSITblastn,        ///< PSI Tblastn
69     ePHIBlastp,         ///< Protein PHI BLAST
70     ePHIBlastn,         ///< Nucleotide PHI BLAST
71     eDeltaBlast,        ///< Delta Blast
72     eVecScreen,         ///< Vector screening
73     eMapper,            ///< Jumper alignment for mapping
74     eKBlastp,            ///< KMER screening and BLASTP
75     eBlastProgramMax    ///< Undefined program
76 };
77 
78 /** Convert a EProgram enumeration value to a task name (as those used in the
79  * BLAST command line binaries)
80  * @param p EProgram enumeration value to convert [in]
81  */
82 NCBI_XBLAST_EXPORT
83 string EProgramToTaskName(EProgram p);
84 
85 /// Map a string into an element of the ncbi::blast::EProgram enumeration
86 /// (except eBlastProgramMax).
87 /// @param program_name [in]
88 /// @return an element of the ncbi::blast::EProgram enumeration, except
89 /// eBlastProgramMax
90 /// @throws CBlastException if the string does not map into any of the EProgram
91 /// elements
92 NCBI_XBLAST_EXPORT
93 EProgram ProgramNameToEnum(const std::string& program_name);
94 
95 /// Validates that the task provided is indeed a valid task, otherwise throws a
96 /// CBlastException
97 /// @param task task name to validate [in]
98 NCBI_XBLAST_EXPORT
99 void ThrowIfInvalidTask(const string& task);
100 
101 /// Convert EProgram to EBlastProgramType.
102 /// @param p Program expressed as an api layer EProgram.
103 /// @return Same program using the core enumeration.
104 NCBI_XBLAST_EXPORT
105 EBlastProgramType
106 EProgramToEBlastProgramType(EProgram p);
107 
108 /// Error or Warning Message from search.
109 ///
110 /// This class encapsulates a single error or warning message returned
111 /// from a search.  These include conditions detected by the algorithm
112 /// where no exception is thrown, but which impact the completeness or
113 /// accuracy of search results.  One example might be a completely
114 /// masked query.
115 
116 class CSearchMessage : public CObject {
117 public:
118     /// Construct a search message object.
119     /// @param severity The severity of this message. [in]
120     /// @param error_id A number unique to this error. [in]
121     /// @param message A description of the error for the user. [in]
CSearchMessage(EBlastSeverity severity,int error_id,const string & message)122     CSearchMessage(EBlastSeverity   severity,
123                    int              error_id,
124                    const string   & message)
125         : m_Severity(severity), m_ErrorId(error_id), m_Message(message)
126     {
127     }
128 
129     /// Construct an empty search message object.
CSearchMessage()130     CSearchMessage()
131         : m_Severity(EBlastSeverity(0)), m_ErrorId(0)
132     {
133     }
134 
135     /// Get the severity of this message.
136     /// @return The severity of this message.
GetSeverity() const137     EBlastSeverity GetSeverity() const
138     {
139         return m_Severity;
140     }
141 
142     /// Adjust the severity of this message.
143     /// @param sev The severity to assign. [in]
SetSeverity(EBlastSeverity sev)144     void SetSeverity(EBlastSeverity sev) { m_Severity = sev; }
145 
146     /// Get the severity of this message as a string.
147     /// @return A symbolic name for the severity level (such as "Warning").
GetSeverityString() const148     string GetSeverityString() const
149     {
150         return GetSeverityString(m_Severity);
151     }
152 
153     /// Get the symbolic name for a level of severity as a string.
154     /// @param severity The severity as an enumeration.
155     /// @return A symbolic name for the severity level (such as "Warning").
GetSeverityString(EBlastSeverity severity)156     static string GetSeverityString(EBlastSeverity severity)
157     {
158         switch(severity) {
159         case eBlastSevInfo:    return "Informational Message";
160         case eBlastSevWarning: return "Warning";
161         case eBlastSevError:   return "Error";
162         case eBlastSevFatal:   return "Fatal Error";
163         }
164         return "Message";
165     }
166 
167     /// Get the error identifier.
168     /// @return An identifier unique to this specific message.
GetErrorId() const169     int GetErrorId() const
170     {
171         return m_ErrorId;
172     }
173 
174     /// Set the error message.
175     /// @return A reference allowing the user to set the error string.
SetMessage(void)176     string& SetMessage(void) { return m_Message; }
177 
178     /// Get the error message.
179     /// @return A message describing this error or warning.
GetMessage(bool withSeverity=true) const180     string GetMessage(bool withSeverity = true) const
181     {
182     	if (withSeverity) {
183     		return GetSeverityString() + ": " + m_Message;
184     	}
185    		return m_Message;
186     }
187 
188     /// Compare two error messages for equality.
189     /// @return True if the messages are the same.
190     bool operator==(const CSearchMessage& rhs) const;
191 
192     /// Compare two error messages for inequality.
193     /// @return True if the messages are not the same.
194     bool operator!=(const CSearchMessage& rhs) const;
195 
196     /// Compare two error messages for order.
197     /// @return True if the first message is less than the second.
198     bool operator<(const CSearchMessage& rhs) const;
199 
200 private:
201     /// The severity of this error or warning message.
202     EBlastSeverity m_Severity;
203 
204     /// A unique identifier specifying what kind of error this is.
205     int            m_ErrorId;
206 
207     /// A message describing the error to the application user.
208     string         m_Message;
209 };
210 
211 /// Class for the messages for an individual query sequence.
212 class NCBI_XBLAST_EXPORT TQueryMessages : public vector< CRef<CSearchMessage> >
213 {
214 public:
215     /// Set the query id as a string.
216     /// @param id The query id.
217     void SetQueryId(const string& id);
218 
219     /// Get the query id as a string.
220     /// @return The query id.
221     string GetQueryId() const;
222 
223     /// Combine other messages with these.
224     /// @param other The second list of messages.
225     void Combine(const TQueryMessages& other);
226 
227 private:
228     /// The query identifier.
229     string m_IdString;
230 };
231 
232 /// typedef for the messages for an entire BLAST search, which could be
233 /// comprised of multiple query sequences
234 class NCBI_XBLAST_EXPORT TSearchMessages : public vector<TQueryMessages>
235 {
236 public:
237     /// Add a message for all queries.
238     /// @param severity The severity of this message. [in]
239     /// @param error_id A number unique to this error. [in]
240     /// @param message A description of the error for the user. [in]
241     void AddMessageAllQueries(EBlastSeverity   severity,
242                               int              error_id,
243                               const string   & message);
244 
245     /// @return true if messages exist.
246     bool HasMessages() const;
247 
248     /// Converts messages to a string, which is returned.
249     /// @return A string containing all such messages.
250     string ToString() const;
251 
252     /// Combine another set of search messages with this one.
253     ///
254     /// Another set of messages is combined with these; each element
255     /// of the other set is combined with the element of this set
256     /// having the same index.  The size of both sets must match.
257     ///
258     /// @param other_msgs Other messages to add to these.
259     void Combine(const TSearchMessages& other_msgs);
260 
261     /// Find and remove redundant messages.
262     void RemoveDuplicates();
263 };
264 
265 /// Specifies the style of Seq-aligns that should be built from the
266 /// internal BLAST data structures
267 enum EResultType {
268     eDatabaseSearch,    ///< Seq-aligns in the style of a database search
269     eSequenceComparison /**< Seq-aligns in the BLAST 2 Sequence style (one
270                          alignment per query-subject pair) */
271 };
272 
273 /// Vector of Seq-align-sets
274 typedef vector< CRef<objects::CSeq_align_set> > TSeqAlignVector;
275 
276 inline bool
operator ==(const CSearchMessage & rhs) const277 CSearchMessage::operator==(const CSearchMessage& rhs) const
278 {
279     if (m_Severity == rhs.m_Severity &&
280         m_ErrorId  == rhs.m_ErrorId &&
281         m_Message  == rhs.m_Message) {
282         return true;
283     } else {
284         return false;
285     }
286 }
287 
288 inline bool
operator !=(const CSearchMessage & rhs) const289 CSearchMessage::operator!=(const CSearchMessage& rhs) const
290 {
291     return !(*this == rhs);
292 }
293 
294 inline bool
operator <(const CSearchMessage & rhs) const295 CSearchMessage::operator<(const CSearchMessage& rhs) const
296 {
297     if (m_ErrorId < rhs.m_ErrorId ||
298         m_Severity < rhs.m_Severity ||
299         m_Message < rhs.m_Message) {
300         return true;
301     } else {
302         return false;
303     }
304 }
305 
306 /// Wrapper for BlastSeqLoc structure.
307 class CBlastSeqLocWrap : public CObject
308 {
309     public:
310 
311         /// Instance constructor.
312         /// @param locs pointer to the object to hold
CBlastSeqLocWrap(BlastSeqLoc * locs)313         CBlastSeqLocWrap( BlastSeqLoc * locs ) : locs_( locs ) {}
314 
315         /// Instance destructor.
~CBlastSeqLocWrap()316         virtual ~CBlastSeqLocWrap() { BlastSeqLocFree( locs_ ); }
317 
318         /// Get access to the held object.
319         /// @return pointer storred by the wrapping object
getLocs() const320         BlastSeqLoc * getLocs() const { return locs_; }
321 
322     private:
323 
324         BlastSeqLoc * locs_;    ///< Wrapped pointer.
325 };
326 
327 END_SCOPE(blast)
328 END_NCBI_SCOPE
329 
330 #endif  /* ALGO_BLAST_API___BLAST_TYPE__HPP */
331