1 /* $Id: blast_seq.h,v 1.29 2006/11/21 17:21:20 papadopo Exp $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's offical duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 *  Author: Ilya Dondoshansky
25 * ===========================================================================*/
26 
27 /** @file blast_seq.h
28  * Functions converting from SeqLocs to structures used in BLAST and back.
29  */
30 
31 #ifndef __BLAST_SEQ__
32 #define __BLAST_SEQ__
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 
38 #ifndef NCBI_C_TOOLKIT
39 #define NCBI_C_TOOLKIT
40 #endif
41 
42 #include <objseq.h>
43 #include <algo/blast/core/blast_def.h>
44 #include <algo/blast/core/blast_query_info.h>
45 #include <algo/blast/core/blast_options.h>
46 
47 /** @addtogroup CToolkitAlgoBlast
48  *
49  * @{
50  */
51 
52 /** Detect duplicate IDs within a list of query sequences
53  * @param query_seqlocs The list of query sequences [in]
54  * @return TRUE if two or more sequences have duplicate IDs,
55  *          FALSE if all IDs are unique
56  */
57 Boolean
58 BlastSeqlocsHaveDuplicateIDs(SeqLoc* query_seqlocs);
59 
60 /** Convert a BlastMaskLoc list to a list of SeqLocs, used for formatting
61  * BLAST results.
62  * @param program_number identifies blastn, blastp, etc. [in]
63  * @param mask_loc internal mask structure [in]
64  * @param query_loc SeqLoc of query [in]
65  * @return Pointer to SeqLoc
66  */
67 SeqLocPtr
68 BlastMaskLocToSeqLoc(EBlastProgramType program_number,
69                      const BlastMaskLoc* mask_loc,
70                      SeqLoc* query_loc);
71 /** Convert a list of mask locations in a form of SeqLoc into a BlastMaskLoc
72  * structure. In case of multiple queries, it is not required to create a mask
73  * SeqLoc for every query.
74  * @param program_number identifies blastn, blastp, etc. [in]
75  * @param mask_locs Masking locations [in]
76  * @param seq_locs Sequence locations [in]
77  * @return Allocated and populated BlastMaskLoc structure.
78  */
79 BlastMaskLoc*
80 BlastMaskLocFromSeqLoc(SeqLoc* mask_locs, SeqLoc* seq_locs,
81                        EBlastProgramType program_number);
82 
83 /** Frees a special type of SeqLoc list, used in BLAST for masking locations.
84  * @param mask_loc Input list of mask SeqLocs [in]
85  * @return NULL
86  */
87 SeqLoc*
88 Blast_ValNodeMaskListFree(SeqLoc* mask_loc);
89 
90 /** Given a list of query SeqLoc's, create the sequence block and the query
91  * info structure. This is the last time SeqLoc is needed before formatting.
92  * @param query_slp List of query SeqLoc's [in]
93  * @param query_options Query setup options, containing genetic code for
94  *                      translation [in]
95  * @param program_number Type of BLAST program [in]
96  * @param masking_locs Masking locations, e.g. from lower case of repeats
97  *                     filtering. [in]
98  * @param query_info Query information structure, containing offsets into
99  *                   the concatenated sequence [out]
100  * @param query_blk Query block, containing (concatenated) sequence [out]
101  */
102 Int2 BLAST_SetUpQuery(EBlastProgramType program_number, SeqLocPtr query_slp,
103         const QuerySetUpOptions* query_options, SeqLoc* masking_locs,
104         BlastQueryInfo** query_info, BLAST_SequenceBlk* *query_blk);
105 
106 /** Set up the subject sequence block in case of two sequences BLAST.
107  * @param program_number Type of BLAST program [in]
108  * @param subject_slp SeqLoc for the subject sequence [in]
109  * @param subject Subject sequence block [out]
110  */
111 Int2 BLAST_SetUpSubject(EBlastProgramType program_number,
112         SeqLocPtr subject_slp, BLAST_SequenceBlk** subject);
113 
114 /** Find a genetic code string in ncbistdaa encoding, given an integer
115  *  genetic code value.
116  * @param gc genetic code value [in]
117  * @param genetic_code genetic code string [out]
118  */
119 Int2 BLAST_GeneticCodeFind(Int4 gc, Uint1** genetic_code);
120 
121 /* @} */
122 
123 #ifdef __cplusplus
124 }
125 #endif
126 
127 #endif /* !__BLAST_SEQ__ */
128