1 /* $Id: blast_seq.h,v 1.29 2006/11/21 17:21:20 papadopo Exp $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's offical duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * Author: Ilya Dondoshansky 25 * ===========================================================================*/ 26 27 /** @file blast_seq.h 28 * Functions converting from SeqLocs to structures used in BLAST and back. 29 */ 30 31 #ifndef __BLAST_SEQ__ 32 #define __BLAST_SEQ__ 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 38 #ifndef NCBI_C_TOOLKIT 39 #define NCBI_C_TOOLKIT 40 #endif 41 42 #include <objseq.h> 43 #include <algo/blast/core/blast_def.h> 44 #include <algo/blast/core/blast_query_info.h> 45 #include <algo/blast/core/blast_options.h> 46 47 /** @addtogroup CToolkitAlgoBlast 48 * 49 * @{ 50 */ 51 52 /** Detect duplicate IDs within a list of query sequences 53 * @param query_seqlocs The list of query sequences [in] 54 * @return TRUE if two or more sequences have duplicate IDs, 55 * FALSE if all IDs are unique 56 */ 57 Boolean 58 BlastSeqlocsHaveDuplicateIDs(SeqLoc* query_seqlocs); 59 60 /** Convert a BlastMaskLoc list to a list of SeqLocs, used for formatting 61 * BLAST results. 62 * @param program_number identifies blastn, blastp, etc. [in] 63 * @param mask_loc internal mask structure [in] 64 * @param query_loc SeqLoc of query [in] 65 * @return Pointer to SeqLoc 66 */ 67 SeqLocPtr 68 BlastMaskLocToSeqLoc(EBlastProgramType program_number, 69 const BlastMaskLoc* mask_loc, 70 SeqLoc* query_loc); 71 /** Convert a list of mask locations in a form of SeqLoc into a BlastMaskLoc 72 * structure. In case of multiple queries, it is not required to create a mask 73 * SeqLoc for every query. 74 * @param program_number identifies blastn, blastp, etc. [in] 75 * @param mask_locs Masking locations [in] 76 * @param seq_locs Sequence locations [in] 77 * @return Allocated and populated BlastMaskLoc structure. 78 */ 79 BlastMaskLoc* 80 BlastMaskLocFromSeqLoc(SeqLoc* mask_locs, SeqLoc* seq_locs, 81 EBlastProgramType program_number); 82 83 /** Frees a special type of SeqLoc list, used in BLAST for masking locations. 84 * @param mask_loc Input list of mask SeqLocs [in] 85 * @return NULL 86 */ 87 SeqLoc* 88 Blast_ValNodeMaskListFree(SeqLoc* mask_loc); 89 90 /** Given a list of query SeqLoc's, create the sequence block and the query 91 * info structure. This is the last time SeqLoc is needed before formatting. 92 * @param query_slp List of query SeqLoc's [in] 93 * @param query_options Query setup options, containing genetic code for 94 * translation [in] 95 * @param program_number Type of BLAST program [in] 96 * @param masking_locs Masking locations, e.g. from lower case of repeats 97 * filtering. [in] 98 * @param query_info Query information structure, containing offsets into 99 * the concatenated sequence [out] 100 * @param query_blk Query block, containing (concatenated) sequence [out] 101 */ 102 Int2 BLAST_SetUpQuery(EBlastProgramType program_number, SeqLocPtr query_slp, 103 const QuerySetUpOptions* query_options, SeqLoc* masking_locs, 104 BlastQueryInfo** query_info, BLAST_SequenceBlk* *query_blk); 105 106 /** Set up the subject sequence block in case of two sequences BLAST. 107 * @param program_number Type of BLAST program [in] 108 * @param subject_slp SeqLoc for the subject sequence [in] 109 * @param subject Subject sequence block [out] 110 */ 111 Int2 BLAST_SetUpSubject(EBlastProgramType program_number, 112 SeqLocPtr subject_slp, BLAST_SequenceBlk** subject); 113 114 /** Find a genetic code string in ncbistdaa encoding, given an integer 115 * genetic code value. 116 * @param gc genetic code value [in] 117 * @param genetic_code genetic code string [out] 118 */ 119 Int2 BLAST_GeneticCodeFind(Int4 gc, Uint1** genetic_code); 120 121 /* @} */ 122 123 #ifdef __cplusplus 124 } 125 #endif 126 127 #endif /* !__BLAST_SEQ__ */ 128