1 /* $Id: na_ungapped.h 504861 2016-06-20 15:45:40Z boratyng $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Ilya Dondoshansky
27  *
28  */
29 
30 /** @file na_ungapped.h
31  * Nucleotide ungapped extension code.
32  */
33 
34 #ifndef ALGO_BLAST_CORE__NA_UNGAPPED__H
35 #define ALGO_BLAST_CORE__NA_UNGAPPED__H
36 
37 #include <algo/blast/core/ncbi_std.h>
38 #include <algo/blast/core/blast_def.h>
39 #include <algo/blast/core/blast_extend.h>
40 #include <algo/blast/core/blast_parameters.h>
41 #include <algo/blast/core/blast_query_info.h>
42 #include <algo/blast/core/lookup_wrap.h>
43 #include <algo/blast/core/blast_hits.h>
44 #include <algo/blast/core/blast_diagnostics.h>
45 
46 #include <algo/blast/core/blast_gapalign.h>
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 /** Signature of function used to compute ungapped alignments */
53 typedef Int4 (*TNaExtendFunction)(const BlastOffsetPair* offset_pairs,
54                     Int4 num_hits,
55                     const BlastInitialWordParameters* word_params,
56                     LookupTableWrap* lookup_wrap,
57                     BLAST_SequenceBlk* query, BLAST_SequenceBlk* subject,
58                     Int4** matrix, BlastQueryInfo* query_info,
59                     Blast_ExtendWord* ewp,
60                     BlastInitHitList* init_hitlist,
61                     Int4 range);
62 
63 /** Find all words for a given subject sequence and perform
64  * ungapped extensions, assuming ordinary blastn.
65  * @param subject The subject sequence [in]
66  * @param query The query sequence (needed only for the discontiguous word
67  *        case) [in]
68  * @param query_info concatenated query information [in]
69  * @param lookup_wrap Pointer to the (wrapper) lookup table structure. Only
70  *        traditional BLASTn lookup table supported. [in]
71  * @param matrix The scoring matrix [in]
72  * @param word_params Parameters for the initial word extension [in]
73  * @param ewp Structure needed for initial word information maintenance [in]
74  * @param offset_pairs Array for storing query and subject offsets. [in]
75  * @param max_hits size of offset arrays [in]
76  * @param init_hitlist Structure to hold all hits information. Has to be
77  *        allocated up front [out]
78  * @param ungapped_stats Various hit counts. Not filled if NULL [out]
79  */
80 NCBI_XBLAST_EXPORT
81 Int2 BlastNaWordFinder(BLAST_SequenceBlk* subject,
82                        BLAST_SequenceBlk* query,
83                        BlastQueryInfo* query_info,
84                        LookupTableWrap* lookup_wrap,
85                        Int4** matrix,
86                        const BlastInitialWordParameters* word_params,
87                        Blast_ExtendWord* ewp,
88                        BlastOffsetPair* offset_pairs,
89                        Int4 max_hits,
90                        BlastInitHitList* init_hitlist,
91                        BlastUngappedStats* ungapped_stats);
92 
93 
94 /** Choose the best routine to use for creating ungapped alignments
95  * @param lookup_wrap Lookup table that influences routine choice [in][out]
96  */
97 NCBI_XBLAST_EXPORT
98 void BlastChooseNaExtend(LookupTableWrap *lookup_wrap);
99 
100 
101 /* A structure to hold several lists of word hits for groups of queries */
102 typedef struct MapperWordHits
103 {
104     BlastOffsetPair** pair_arrays; /**< lists of word hits */
105     Int4* num;                     /**< number of hits in the list */
106     Int4 num_arrays;               /**< number of pair_arrays */
107     Int4 array_size;               /**< size of each array */
108     Int4* last_diag;               /**< diagnal for the last word hit for each
109                                         query context */
110     Int4* last_pos;                /**< subject position for the last word hit
111                                         for each query context */
112 
113     Int4 divisor;                  /**< divisor used to find pair_arrays index
114                                         based on query offset */
115 } MapperWordHits;
116 
117 MapperWordHits* MapperWordHitsFree(MapperWordHits* wh);
118 MapperWordHits* MapperWordHitsNew(const BLAST_SequenceBlk* query,
119                                   const BlastQueryInfo* query_info);
120 
121 
122 NCBI_XBLAST_EXPORT
123 Int2
124 JumperNaWordFinder(BLAST_SequenceBlk * subject,
125                    BLAST_SequenceBlk * query,
126                    BlastQueryInfo * query_info,
127                    LookupTableWrap * lookup_wrap,
128                    const BlastInitialWordParameters * word_params,
129                    const BlastScoringParameters* score_params,
130                    const BlastHitSavingParameters* hit_params,
131                    BlastOffsetPair * offset_pairs,
132                    MapperWordHits* word_hits,
133                    Int4 max_hits,
134                    BlastGapAlignStruct* gap_align,
135                    BlastInitHitList* init_hitlist,
136                    BlastHSPList** hsp_list_ptr,
137                    BlastUngappedStats * ungapped_stats,
138                    BlastGappedStats* gapped_stats);
139 
140 
141 NCBI_XBLAST_EXPORT
142 Int2 ShortRead_IndexedWordFinder(
143         BLAST_SequenceBlk * subject,
144         BLAST_SequenceBlk * query,
145         BlastQueryInfo * query_info,
146         LookupTableWrap * lookup_wrap,
147         const BlastInitialWordParameters * word_params,
148         const BlastScoringParameters* score_params,
149         const BlastHitSavingParameters* hit_params,
150         BlastOffsetPair * offset_pairs,
151         MapperWordHits* word_hits,
152         Int4 max_hits,
153         BlastGapAlignStruct* gap_align,
154         BlastInitHitList* init_hitlist,
155         BlastHSPList** hsp_list,
156         BlastUngappedStats* ungapped_stats,
157         BlastGappedStats* gapped_stats);
158 
159 
160 
161 #ifdef __cplusplus
162 }
163 #endif
164 #endif /* !ALGO_BLAST_CORE__NA_UNGAPPED__H */
165