1 /* $Id: blast_input.h,v 1.20 2007/03/12 16:12:46 madden Exp $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Ilya Dondoshansky 27 * 28 */ 29 30 /** @file blast_input.h 31 * Reading FASTA sequences for BLAST 32 */ 33 34 #ifndef __BLAST_INPUT__ 35 #define __BLAST_INPUT__ 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #ifndef NCBI_C_TOOLKIT 42 #define NCBI_C_TOOLKIT 43 #endif 44 45 #include <ncbi.h> 46 #include <algo/blast/core/blast_def.h> 47 #include <algo/blast/core/blast_message.h> 48 49 /** @addtogroup CToolkitAlgoBlast 50 * 51 * @{ 52 */ 53 54 /** Read the query sequences from a file, return a SeqLoc list. 55 * @param infp The input file [in] 56 * @param query_is_na Are sequences nucleotide (or protein)? [in] 57 * @param strand Which strands should SeqLocs contain (0 for protein, 58 * 1 for plus, 2 for minus, 3 for both)? [in] 59 * @param max_total_length length of query sequences to be returned [in] 60 * @param from Starting offset in query location [in] 61 * @param to Ending offset in query location (-1 for end of sequence) [in] 62 * @param lcase_mask The lower case masking locations (no lower case masking 63 * if NULL [out] 64 * @param query_slp List of query SeqLocs [out] 65 * @param ctr Number from which to start counting local ids, will be 66 * incremented by number of queries read in [in|out] 67 * @param num_queries Number of sequences read [out] 68 * @param believe_query parse FASTA seqid if TRUE [in] 69 * @param genetic_code Genetic code to use for thie query's translation, if 70 * it is nucleotide [in] 71 * @return number of letters read, negative number on error. 72 */ 73 Int4 74 BLAST_GetQuerySeqLoc(FILE *infp, Boolean query_is_na, Uint1 strand, 75 Int4 max_total_length, Int4 from, Int4 to, 76 SeqLoc** lcase_mask, SeqLocPtr* query_slp, Int4Ptr ctr, 77 Int4* num_queries, Boolean believe_query, 78 Int4 genetic_code); 79 80 81 /** The possible file formats of a PSI-BLAST checkpoint file. */ 82 typedef enum EPsiCheckpointType { 83 eStandardCheckpoint = 0, /**< The useual PSI-BLAST binary format */ 84 eAsnTextCheckpoint = 1, /**< ASN.1 text format */ 85 eAsnBinaryCheckpoint = 2 /**< ASN.1 binary format */ 86 } EPsiCheckpointType; 87 88 /** The location and type of a PSI-BLAST checkpoint file */ 89 typedef struct Blast_PsiCheckpointLoc { 90 EPsiCheckpointType checkpoint_type; /**< file format */ 91 char * filename; /**< name of the file */ 92 } Blast_PsiCheckpointLoc; 93 94 /** Create a new locator for a PSI-BLAST checkpoint file. 95 * @param checkpoint_type file format 96 * @param filename name of the file */ 97 Blast_PsiCheckpointLoc * 98 Blast_PsiCheckpointLocNew(EPsiCheckpointType checkpoint_type, 99 char * filename); 100 101 /** Free a PSI-BLAST checkpoint file locator */ 102 void 103 Blast_PsiCheckpointLocFree(Blast_PsiCheckpointLoc ** psi_checkpoint); 104 105 106 /** 107 * Read frequency ratios from a PSI-BLAST checkpoint file. 108 * 109 * @param freq_ratios the frequency ratios 110 * @param query_length the length of the query, and second dimension of 111 * freq_ratios 112 * @param query query sequence data 113 * @param psi_checkpoint location of the checkpoint data 114 * @param blast_msg a pointer to hold BLAST warnings. 115 * 116 * @return 0 on success, nonzero otherwise 117 */ 118 int 119 Blast_PosReadCheckpoint(double ** freq_ratios, 120 int query_length, 121 const Uint1 * query, 122 Blast_PsiCheckpointLoc * psi_checkpoint, 123 Blast_Message* *blast_msg); 124 125 126 /* @} */ 127 128 #ifdef __cplusplus 129 } 130 #endif 131 132 /* 133 * =========================================================================== 134 * 135 * $Log: blast_input.h,v $ 136 * Revision 1.20 2007/03/12 16:12:46 madden 137 * - Create an enum EPsiCheckpointType that specifies the file format 138 * of a PSI-BLAST checkpoint file. 139 * - Define a new datatype Blast_PsiCheckpointLoc to 140 * specify the location and type of a PSI-BLAST checkpoint file. 141 * - Declare Blast_PsiCheckpointLocNew and Blast_PsiCheckpointLocFree. 142 * [from Mike Gertz] 143 * 144 * Revision 1.19 2007/03/05 14:50:08 camacho 145 * - Added a prototype for Blast_PosReadCheckpoint. 146 * - Added core/blast_message.h to the includes because 147 * Blast_PosReadCheckpoint has a Blast_Message ** parameter. 148 * 149 * Revision 1.18 2006/04/21 14:33:44 madden 150 * BLAST_GetQuerySeqLoc parameter ctr is now a pointer to Int4, fixes case of more than 32k queries 151 * 152 * Revision 1.17 2005/08/08 15:51:41 dondosha 153 * Added genetic code argument to BLAST_GetQuerySeqLoc, to save in the created Bioseqs 154 * 155 * Revision 1.16 2005/04/06 23:27:53 dondosha 156 * Doxygen fixes 157 * 158 * Revision 1.15 2005/02/09 20:55:38 dondosha 159 * Changed doxygen group from AlgoBlast, which is reserved for C++ toolkit, to CToolkitAlgoBlast 160 * 161 * Revision 1.14 2005/02/02 18:57:21 dondosha 162 * Pass back lower case mask in a SeqLoc form; removed unused function 163 * 164 * 165 * =========================================================================== 166 */ 167 168 #endif /* !__BLAST_INPUT__ */ 169