1 /* $Id: blast_gapalign.h 504861 2016-06-20 15:45:40Z boratyng $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Ilya Dondoshansky 27 * 28 */ 29 30 /** @file blast_gapalign.h 31 * Structures and functions prototypes used for BLAST gapped extension 32 * @todo FIXME: elaborate on contents. 33 */ 34 35 #ifndef ALGO_BLAST_CORE__BLAST_GAPALIGN__H 36 #define ALGO_BLAST_CORE__BLAST_GAPALIGN__H 37 38 #include <algo/blast/core/ncbi_std.h> 39 #include <algo/blast/core/blast_export.h> 40 #include <algo/blast/core/blast_def.h> 41 #include <algo/blast/core/blast_extend.h> 42 #include <algo/blast/core/blast_query_info.h> 43 #include <algo/blast/core/blast_parameters.h> 44 #include <algo/blast/core/gapinfo.h> 45 #include <algo/blast/core/greedy_align.h> 46 #include <algo/blast/core/blast_hits.h> 47 #include <algo/blast/core/blast_diagnostics.h> 48 49 #ifdef __cplusplus 50 extern "C" { 51 #endif 52 53 /** Split subject sequences if longer than this */ 54 #define MAX_DBSEQ_LEN 5000000 55 56 /** Auxiliary structure for dynamic programming gapped extension */ 57 typedef struct { 58 Int4 best; /**< score of best path that ends in a match 59 at this position */ 60 Int4 best_gap; /**< score of best path that ends in a gap 61 at this position */ 62 } BlastGapDP; 63 64 65 typedef struct JumperGapAlign JumperGapAlign; 66 67 68 /** Structure supporting the gapped alignment */ 69 typedef struct BlastGapAlignStruct { 70 Boolean positionBased; /**< Is this PSI-BLAST? */ 71 GapStateArrayStruct* state_struct; /**< Structure to keep extension 72 state information */ 73 GapEditScript* edit_script; /**< The traceback (gap) information */ 74 GapPrelimEditBlock *fwd_prelim_tback; /**< traceback from right extensions */ 75 GapPrelimEditBlock *rev_prelim_tback; /**< traceback from left extensions */ 76 SGreedyAlignMem* greedy_align_mem;/**< Preallocated memory for the greedy 77 gapped extension */ 78 BlastGapDP* dp_mem; /**< scratch structures for dynamic programming */ 79 Int4 dp_mem_alloc; /**< current number of structures allocated */ 80 BlastScoreBlk* sbp; /**< Pointer to the scoring information block */ 81 Int4 gap_x_dropoff; /**< X-dropoff parameter to use */ 82 Int4 max_mismatches; /**< Max number of mismatches for jumper */ 83 Int4 mismatch_window; /**< Window sie for mismatches for jumper */ 84 Int4 query_start; /**< query start offset of current alignment */ 85 Int4 query_stop; /**< query end offseet of current alignment */ 86 Int4 subject_start; /**< subject start offset current alignment */ 87 Int4 subject_stop; /**< subject end offset of current alignment */ 88 Int4 greedy_query_seed_start; /**< for greedy alignments, the query 89 offset of the gapped start point */ 90 Int4 greedy_subject_seed_start; /**< for greedy alignments, the subject 91 offset of the gapped start point */ 92 Int4 score; /**< Return value: alignment score */ 93 94 JumperGapAlign* jumper; /**< data for jumper alignment */ 95 } BlastGapAlignStruct; 96 97 /** Initializes the BlastGapAlignStruct structure 98 * @param score_params Parameters related to scoring alignments [in] 99 * @param ext_params parameters related to gapped extension [in] 100 * @param max_subject_length Maximum length of any subject sequence (needed 101 * for greedy extension allocation only) [in] 102 * @param sbp The scoring information block [in] 103 * @param gap_align_ptr The BlastGapAlignStruct structure [out] 104 */ 105 NCBI_XBLAST_EXPORT 106 Int2 107 BLAST_GapAlignStructNew(const BlastScoringParameters* score_params, 108 const BlastExtensionParameters* ext_params, 109 Uint4 max_subject_length, BlastScoreBlk* sbp, 110 BlastGapAlignStruct** gap_align_ptr); 111 112 /** Deallocates memory in the BlastGapAlignStruct structure */ 113 NCBI_XBLAST_EXPORT 114 BlastGapAlignStruct* 115 BLAST_GapAlignStructFree(BlastGapAlignStruct* gap_align); 116 117 /** Performs gapped extension for all non-Mega BLAST programs, given 118 * that ungapped extension has been done earlier. 119 * Sorts initial HSPs by score (from ungapped extension); 120 * Deletes HSPs that are included in already extended HSPs; 121 * Performs gapped extension; 122 * Saves HSPs into an HSP list. 123 * @param program_number Type of BLAST program [in] 124 * @param query The query sequence block [in] 125 * @param query_info Query information structure, containing offsets into 126 * the concatenated sequence [in] 127 * @param subject The subject sequence block [in] 128 * @param gap_align The auxiliary structure for gapped alignment [in] 129 * @param score_params Options and parameters related to scoring [in] 130 * @param ext_params Options and parameters related to extensions [in] 131 * @param hit_params Options related to saving hits [in] 132 * @param init_hitlist List of initial HSPs (offset pairs with additional 133 * information from the ungapped alignment performed earlier) [in] 134 * @param hsp_list_ptr Structure containing all saved HSPs [out] 135 * @param gapped_stats Return statistics (not filled if NULL) [out] 136 * @param fence_hit True is returned here if overrun is detected. [in] 137 */ 138 NCBI_XBLAST_EXPORT 139 Int2 BLAST_GetGappedScore (EBlastProgramType program_number, 140 BLAST_SequenceBlk* query, BlastQueryInfo* query_info, 141 BLAST_SequenceBlk* subject, 142 BlastGapAlignStruct* gap_align, 143 const BlastScoringParameters* score_params, 144 const BlastExtensionParameters* ext_params, 145 const BlastHitSavingParameters* hit_params, 146 BlastInitHitList* init_hitlist, 147 BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats, 148 Boolean * fence_hit); 149 150 /** Perform a gapped alignment with traceback 151 * @param program Type of BLAST program [in] 152 * @param query The query sequence [in] 153 * @param subject The subject sequence [in] 154 * @param gap_align The gapped alignment structure [in] [out] 155 * @param score_params Scoring parameters [in] 156 * @param q_start Offset in query where to start alignment [in] 157 * @param s_start Offset in subject where to start alignment [in] 158 * @param query_length Maximal allowed extension in query [in] 159 * @param subject_length Maximal allowed extension in subject [in] 160 * @param fence_hit True is returned here if overrun is detected. [in] 161 */ 162 NCBI_XBLAST_EXPORT 163 Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, 164 const Uint1* query, const Uint1* subject, 165 BlastGapAlignStruct* gap_align, 166 const BlastScoringParameters* score_params, 167 Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, 168 Boolean * fence_hit); 169 170 /** Greedy gapped alignment, with or without traceback. 171 * Given two sequences, relevant options and an offset pair, fills the 172 * gap_align structure with alignment endpoints and, if traceback is 173 * performed, gap information. 174 * @param query The query sequence [in] 175 * @param subject The subject sequence [in] 176 * @param query_length The query sequence length [in] 177 * @param subject_length The subject sequence length [in] 178 * @param gap_align The structure holding various information and memory 179 * needed for gapped alignment [in] [out] 180 * @param score_params Parameters related to scoring alignments [in] 181 * @param q_off Starting offset in query [in] 182 * @param s_off Starting offset in subject [in] 183 * @param compressed_subject Is subject sequence compressed? [in] 184 * @param do_traceback Should traceback be saved? [in] 185 * @param fence_hit True is returned here if overrun is detected. [in] 186 */ 187 NCBI_XBLAST_EXPORT 188 Int2 189 BLAST_GreedyGappedAlignment(const Uint1* query, const Uint1* subject, 190 Int4 query_length, Int4 subject_length, BlastGapAlignStruct* gap_align, 191 const BlastScoringParameters* score_params, 192 Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback, 193 Boolean * fence_hit); 194 195 /** Convert initial HSP list to an HSP list: to be used in ungapped search. 196 * Ungapped data must be available in the initial HSP list for this function 197 * to work. 198 * @param init_hitlist List of initial HSPs with ungapped extension 199 * information [in] 200 * @param query_info Query information structure, containing offsets into 201 * the concatenated queries/strands/frames [in] 202 * @param subject Subject sequence block containing frame information [in] 203 * @param hit_options Hit saving options [in] 204 * @param hsp_list_ptr HSPs in the final form [out] 205 */ 206 NCBI_XBLAST_EXPORT 207 Int2 BLAST_GetUngappedHSPList(BlastInitHitList* init_hitlist, 208 BlastQueryInfo* query_info, BLAST_SequenceBlk* subject, 209 const BlastHitSavingOptions* hit_options, 210 BlastHSPList** hsp_list_ptr); 211 212 /** Adjusts range of subject sequence to be passed for gapped extension, 213 * taking into account the length and starting position of the alignment in 214 * query. 215 * @param subject_offset_ptr Start of the subject range [out] 216 * @param subject_length_ptr Length of the subject range [out] 217 * @param query_offset Offset in query from which alignment starts [in] 218 * @param query_length Length of the query sequence [in] 219 * @param start_shift The offset by which the output range is shifted with 220 * respect to the full subject sequence [out] 221 */ 222 NCBI_XBLAST_EXPORT 223 void 224 AdjustSubjectRange(Int4* subject_offset_ptr, Int4* subject_length_ptr, 225 Int4 query_offset, Int4 query_length, Int4* start_shift); 226 227 /** Function to look for the highest scoring window (of size HSP_MAX_WINDOW) 228 * in an HSP and return the middle of this. Used by the gapped-alignment 229 * functions to start the gapped alignments. 230 * @param query The query sequence [in] 231 * @param subject The subject sequence [in] 232 * @param sbp Scoring block, containing matrix [in] 233 * @param q_start Starting offset in query [in] 234 * @param q_length Length of HSP in query [in] 235 * @param s_start Starting offset in subject [in] 236 * @param s_length Length of HSP in subject [in] 237 * @return The offset at which alignment should be started [out] 238 */ 239 NCBI_XBLAST_EXPORT 240 Int4 241 BlastGetStartForGappedAlignment (const Uint1* query, const Uint1* subject, 242 const BlastScoreBlk* sbp, Uint4 q_start, Uint4 q_length, 243 Uint4 s_start, Uint4 s_length); 244 245 /** Function to look for the longest identity match run (up to size HSP_MAX_IDENT_RUN) 246 * in an HSP and return the middle of this. Used by the gapped-alignment 247 * functions to start the gapped alignments. 248 * @param query The query sequence [in] 249 * @param subject The subject sequence [in] 250 * @param hsp On return, the gapped_start will be filled. [in][out] 251 */ 252 NCBI_XBLAST_EXPORT 253 void 254 BlastGetStartForGappedAlignmentNucl (const Uint1* query, const Uint1* subject, 255 BlastHSP* hsp); 256 257 /** Function to look for the highest scoring window (of size HSP_MAX_WINDOW) 258 * in an HSP and return the middle of this. Used by the gapped-alignment 259 * functions to start the gapped alignments. 260 * Should be used instead of BlastGetStartForGappedAlignment 261 * @param query The query sequence [in] 262 * @param subject The subject sequence [in] 263 * @param sbp Scoring block, containing matrix [in] 264 * @param hsp start and stops of HSP [in] 265 * @param q_retval query offset to use [out] 266 * @param s_retval subject offset to use [out] 267 * 268 */ 269 NCBI_XBLAST_EXPORT 270 Boolean 271 BlastGetOffsetsForGappedAlignment (const Uint1* query, const Uint1* subject, 272 const BlastScoreBlk* sbp, BlastHSP* hsp, Int4* q_retval, Int4* s_retval); 273 274 #ifdef __cplusplus 275 } 276 #endif 277 #endif /* !ALGO_BLAST_CORE__BLAST_GAPALIGN__H */ 278