1 /****************************************************************\
2 *                                                                *
3 *  Library for HSP sets (high-scoring segment pairs)             *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 /* Version 0.4
17  */
18 
19 #ifndef INCLUDED_HSPSET_H
20 #define INCLUDED_HSPSET_H
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif /* __cplusplus */
25 
26 #include <glib.h>
27 
28 #include "match.h"
29 #include "argument.h"
30 #include "recyclebin.h"
31 #include "pqueue.h"
32 #include "matrix.h"
33 #include "wordhood.h"
34 #include "recyclebin.h"
35 #include "threadref.h"
36 
37 typedef struct {
38         gint filter_threshold;
39     gboolean use_wordhood_dropoff;
40         gint seed_repeat;
41     /**/
42         gint dna_wordlen;
43         gint protein_wordlen;
44         gint codon_wordlen;
45     /**/
46         gint dna_hsp_dropoff;
47         gint protein_hsp_dropoff;
48         gint codon_hsp_dropoff;
49     /**/
50         gint dna_hsp_threshold;
51         gint protein_hsp_threshold;
52         gint codon_hsp_threshold;
53     /**/
54         gint dna_word_limit;
55         gint protein_word_limit;
56         gint codon_word_limit;
57     /**/
58         gint geneseed_threshold;
59         gint geneseed_repeat;
60     /**/
61 } HSPset_ArgumentSet;
62 
63 HSPset_ArgumentSet *HSPset_ArgumentSet_create(Argument *arg);
64 
65 /**/
66 
67 typedef struct {
68         guint  query_start;
69         guint  target_start;
70         guint  length;   /* Length is number of match state visits */
71   Match_Score  score;
72         guint  cobs;     /* cobs == Centre Offset By Score         */
73 struct HSPset *hsp_set;  /* Never included in hsp_set->ref_count   */
74 } HSP;
75 /* FIXME: remove hsp_set from HSP to save space ? */
76 
77 void HSP_destroy(HSP *hsp);
78 
79 #define HSP_query_advance(hsp) \
80     ((hsp)->hsp_set->param->match->query->advance)
81 
82 #define HSP_target_advance(hsp) \
83     ((hsp)->hsp_set->param->match->target->advance)
84 
85 #define HSP_query_end(hsp) \
86     ((hsp)->query_start    \
87   + ((hsp)->length*HSP_query_advance(hsp)))
88 
89 #define HSP_target_end(hsp) \
90     ((hsp)->target_start    \
91   + ((hsp)->length*HSP_target_advance(hsp)))
92 
93 #define HSP_query_cobs(hsp) \
94     ((hsp)->query_start     \
95    +((hsp)->cobs*HSP_query_advance(hsp)))
96 
97 #define HSP_target_cobs(hsp) \
98     ((hsp)->target_start     \
99   + ((hsp)->cobs*HSP_target_advance(hsp)))
100 
101 #define HSP_diagonal(hsp)                           \
102     (((hsp)->target_start*HSP_query_advance(hsp))   \
103     -((hsp)->query_start*HSP_target_advance(hsp)))
104 /* advance_{query,target} are swapped for position on diagonal */
105 
106 #define HSP_get_score(hsp, query_pos, target_pos)    \
107      ((hsp)->hsp_set->param->match->score_func(      \
108       (hsp)->hsp_set->param->match,                  \
109       (hsp)->hsp_set->query, (hsp)->hsp_set->target, \
110       (query_pos), (target_pos)))
111 
112 #define HSP_get_display(hsp, query_pos, target_pos, display_str)  \
113      ((hsp)->hsp_set->param->match->display_func(                 \
114       (hsp)->hsp_set->param->match,                               \
115       (hsp)->hsp_set->query, (hsp)->hsp_set->target,              \
116       (query_pos), (target_pos), display_str))
117 
118 #define HSP_query_masked(hsp, query_pos)             \
119     ((hsp)->hsp_set->param->match->query->mask_func( \
120      (hsp)->hsp_set->param->match->query,            \
121      (hsp)->hsp_set->query, (query_pos)))
122 
123 #define HSP_target_masked(hsp, target_pos)            \
124     ((hsp)->hsp_set->param->match->target->mask_func( \
125      (hsp)->hsp_set->param->match->target,            \
126      (hsp)->hsp_set->target, (target_pos)))
127 
128 #define HSP_query_self(hsp, query_pos)               \
129     ((hsp)->hsp_set->param->match->query->self_func( \
130      (hsp)->hsp_set->param->match->query,            \
131      (hsp)->hsp_set->query, (query_pos)))
132 
133 #define HSP_target_self(hsp, target_pos)               \
134     ((hsp)->hsp_set->param->match->target->self_func(  \
135      (hsp)->hsp_set->param->match->target,             \
136      (hsp)->hsp_set->target, (target_pos)))
137 
138 #define HSPset_is_empty(hspset) ((hspset)->is_empty)
139 
140 typedef struct HSP_Param {
141          ThreadRef  *thread_ref;
142 HSPset_ArgumentSet  *has;
143              Match  *match;
144               gint   wordlen;
145               gint   seedlen;
146        Match_Score   dropoff;
147        Match_Score   threshold;
148        Match_Score   wordlimit;
149           WordHood  *wordhood;
150           gboolean   use_horizon;
151               gint   seed_repeat;
152         RecycleBin  *hsp_recycle;
153 #ifdef USE_PTHREADS
154    pthread_mutex_t   hsp_recycle_lock;
155 #endif /* USE_PTHREADS */
156 } HSP_Param;
157 
158 HSP_Param *HSP_Param_create(Match *match, gboolean use_horizon);
159      void  HSP_Param_destroy(HSP_Param *hsp_param);
160 HSP_Param *HSP_Param_share(HSP_Param *hsp_param);
161 HSP_Param *HSP_Param_swap(HSP_Param *hsp_param);
162 
163      void  HSP_Param_set_wordlen(HSP_Param *hsp_param, gint wordlen);
164      /**/
165      void  HSP_Param_set_dna_hsp_threshold(HSP_Param *hsp_param,
166                                            gint dna_hsp_threshold);
167      void  HSP_Param_set_protein_hsp_threshold(HSP_Param *hsp_param,
168                                                gint protein_hsp_threshold);
169      void  HSP_Param_set_codon_hsp_threshold(HSP_Param *hsp_param,
170                                              gint protein_hsp_threshold);
171      /**/
172      void  HSP_Param_set_dna_word_limit(HSP_Param *hsp_param,
173                                         gint dna_word_limit);
174      void  HSP_Param_set_protein_word_limit(HSP_Param *hsp_param,
175                                             gint protein_word_limit);
176      void  HSP_Param_set_codon_word_limit(HSP_Param *hsp_param,
177                                           gint codon_word_limit);
178      /**/
179      void  HSP_Param_set_dna_hsp_dropoff(HSP_Param *hsp_param,
180                                          gint dna_hsp_dropoff);
181      void  HSP_Param_set_protein_hsp_dropoff(HSP_Param *hsp_param,
182                                              gint dna_hsp_dropoff);
183      void  HSP_Param_set_codon_hsp_dropoff(HSP_Param *hsp_param,
184                                            gint dna_hsp_dropoff);
185      /**/
186      void  HSP_Param_set_hsp_threshold(HSP_Param *hsp_param,
187                                        gint hsp_threshold);
188      void  HSP_Param_set_seed_repeat(HSP_Param *hsp_param,
189                                      gint seed_repeat);
190 
191 typedef struct HSPset {
192               gint    ref_count;
193           Sequence    *query;
194           Sequence    *target;
195          HSP_Param    *param;
196               gint ****horizon;
197          GPtrArray    *hsp_list;
198           /**/
199           gboolean     is_finalised;
200             PQueue   **filter;
201           gboolean     is_empty;
202          PQueueSet    *pqueue_set;
203 } HSPset;
204 
205 HSPset *HSPset_create(Sequence *query, Sequence *target,
206                       HSP_Param *hsp_param);
207 HSPset *HSPset_share(HSPset *hsp_set);
208   void  HSPset_destroy(HSPset *hsp_set);
209   void  HSPset_swap(HSPset *hsp_set, HSP_Param *hsp_param);
210   void  HSPset_revcomp(HSPset *hsp_set);
211   void  HSPset_seed_hsp(HSPset *hsp_set,
212                         guint query_start, guint target_start);
213   void  HSPset_add_known_hsp(HSPset *hsp_set,
214                              guint query_start, guint target_start,
215                              guint length);
216   void  HSPset_seed_all_hsps(HSPset *hsp_set,
217                         guint *seed_list, guint seed_list_len);
218 /* HSPset_seed_all_hsps() can only be called once on the HSPset.
219  * It automatically finalises the HSPset
220  * position_list should contain (qpos,tpos) pairs,
221  * and may be sorted in place.
222  */
223 
224 HSPset *HSPset_finalise(HSPset *hsp_set);
225 
226 void HSP_print(HSP *hsp, gchar *name);
227 void HSPset_print(HSPset *hsp_set);
228 
229 void HSPset_filter_ungapped(HSPset *hsp_set);
230 /* Remove HSPs which overlap by more than 50% of the score.
231  * This is used for 3:3 ungapped alignments.
232  */
233 
234 /**/
235 
236 typedef struct HSPset_SList_Node {
237     struct HSPset_SList_Node *next;
238                         gint  query_pos;
239                         gint  target_pos;
240 } HSPset_SList_Node;
241 
242 RecycleBin *HSPset_SList_RecycleBin_create(void);
243 
244 HSPset_SList_Node *HSPset_SList_append(RecycleBin *recycle_bin,
245                                        HSPset_SList_Node *next,
246                                        gint query_pos, gint target_pos);
247 void HSPset_seed_all_qy_sorted(HSPset *hsp_set, HSPset_SList_Node *seed_list);
248 
249 /**/
250 
251 #ifdef __cplusplus
252 }
253 #endif /* __cplusplus */
254 
255 #endif /* INCLUDED_HSPSET_H */
256 
257