1 /* $Id: blast_input.h,v 1.20 2007/03/12 16:12:46 madden Exp $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 */
29 
30 /** @file blast_input.h
31  * Reading FASTA sequences for BLAST
32  */
33 
34 #ifndef __BLAST_INPUT__
35 #define __BLAST_INPUT__
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 #ifndef NCBI_C_TOOLKIT
42 #define NCBI_C_TOOLKIT
43 #endif
44 
45 #include <ncbi.h>
46 #include <algo/blast/core/blast_def.h>
47 #include <algo/blast/core/blast_message.h>
48 
49 /** @addtogroup CToolkitAlgoBlast
50  *
51  * @{
52  */
53 
54 /** Read the query sequences from a file, return a SeqLoc list.
55  * @param infp The input file [in]
56  * @param query_is_na Are sequences nucleotide (or protein)? [in]
57  * @param strand Which strands should SeqLocs contain (0 for protein,
58  *               1 for plus, 2 for minus, 3 for both)? [in]
59  * @param max_total_length length of query sequences to be returned [in]
60  * @param from Starting offset in query location [in]
61  * @param to Ending offset in query location (-1 for end of sequence) [in]
62  * @param lcase_mask The lower case masking locations (no lower case masking
63  *                   if NULL [out]
64  * @param query_slp List of query SeqLocs [out]
65  * @param ctr Number from which to start counting local ids, will be
66  *   incremented by number of queries read in  [in|out]
67  * @param num_queries Number of sequences read [out]
68  * @param believe_query parse FASTA seqid if TRUE [in]
69  * @param genetic_code Genetic code to use for thie query's translation, if
70  *                     it is nucleotide [in]
71  * @return number of letters read, negative number on error.
72  */
73 Int4
74 BLAST_GetQuerySeqLoc(FILE *infp, Boolean query_is_na, Uint1 strand,
75                      Int4 max_total_length, Int4 from, Int4 to,
76                      SeqLoc** lcase_mask, SeqLocPtr* query_slp, Int4Ptr ctr,
77                      Int4* num_queries, Boolean believe_query,
78                      Int4 genetic_code);
79 
80 
81 /** The possible file formats of a PSI-BLAST checkpoint file. */
82 typedef enum EPsiCheckpointType {
83     eStandardCheckpoint = 0,    /**< The useual PSI-BLAST binary format */
84     eAsnTextCheckpoint = 1,     /**< ASN.1 text format */
85     eAsnBinaryCheckpoint = 2    /**< ASN.1 binary format */
86 } EPsiCheckpointType;
87 
88 /** The location and type of a PSI-BLAST checkpoint file  */
89 typedef struct Blast_PsiCheckpointLoc {
90     EPsiCheckpointType checkpoint_type; /**< file format  */
91     char * filename;                    /**< name of the file */
92 } Blast_PsiCheckpointLoc;
93 
94 /** Create a new locator for a PSI-BLAST checkpoint file.
95  * @param checkpoint_type    file format
96  * @param filename           name of the file */
97 Blast_PsiCheckpointLoc *
98 Blast_PsiCheckpointLocNew(EPsiCheckpointType checkpoint_type,
99                           char * filename);
100 
101 /** Free a PSI-BLAST checkpoint file locator */
102 void
103 Blast_PsiCheckpointLocFree(Blast_PsiCheckpointLoc ** psi_checkpoint);
104 
105 
106 /**
107  * Read frequency ratios from a PSI-BLAST checkpoint file.
108  *
109  * @param freq_ratios     the frequency ratios
110  * @param query_length    the length of the query, and second dimension of
111  *                        freq_ratios
112  * @param query           query sequence data
113  * @param psi_checkpoint  location of the checkpoint data
114  * @param blast_msg       a pointer to hold BLAST warnings.
115  *
116  * @return 0 on success, nonzero otherwise
117  */
118 int
119 Blast_PosReadCheckpoint(double ** freq_ratios,
120                         int query_length,
121                         const Uint1 * query,
122                         Blast_PsiCheckpointLoc * psi_checkpoint,
123                         Blast_Message* *blast_msg);
124 
125 
126 /* @} */
127 
128 #ifdef __cplusplus
129 }
130 #endif
131 
132 /*
133 * ===========================================================================
134 *
135 * $Log: blast_input.h,v $
136 * Revision 1.20  2007/03/12 16:12:46  madden
137 *    - Create an enum EPsiCheckpointType that specifies the file format
138 *      of a PSI-BLAST checkpoint file.
139 *    - Define a new datatype Blast_PsiCheckpointLoc to
140 *      specify the location and type of a PSI-BLAST checkpoint file.
141 *    - Declare Blast_PsiCheckpointLocNew and Blast_PsiCheckpointLocFree.
142 *    [from Mike Gertz]
143 *
144 * Revision 1.19  2007/03/05 14:50:08  camacho
145 * - Added a prototype for Blast_PosReadCheckpoint.
146 * - Added core/blast_message.h to the includes because
147 *   Blast_PosReadCheckpoint has a Blast_Message ** parameter.
148 *
149 * Revision 1.18  2006/04/21 14:33:44  madden
150 * BLAST_GetQuerySeqLoc parameter ctr is now a pointer to Int4, fixes case of more than 32k queries
151 *
152 * Revision 1.17  2005/08/08 15:51:41  dondosha
153 * Added genetic code argument to BLAST_GetQuerySeqLoc, to save in the created Bioseqs
154 *
155 * Revision 1.16  2005/04/06 23:27:53  dondosha
156 * Doxygen fixes
157 *
158 * Revision 1.15  2005/02/09 20:55:38  dondosha
159 * Changed doxygen group from AlgoBlast, which is reserved for C++ toolkit, to CToolkitAlgoBlast
160 *
161 * Revision 1.14  2005/02/02 18:57:21  dondosha
162 * Pass back lower case mask in a SeqLoc form; removed unused function
163 *
164 *
165 * ===========================================================================
166 */
167 
168 #endif /* !__BLAST_INPUT__ */
169