1 /****************************************************************\ 2 * * 3 * Library for HSP sets (high-scoring segment pairs) * 4 * * 5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk * 6 * Copyright (C) 2000-2009. All Rights Reserved. * 7 * * 8 * This source code is distributed under the terms of the * 9 * GNU General Public License, version 3. See the file COPYING * 10 * or http://www.gnu.org/licenses/gpl.txt for details * 11 * * 12 * If you use this code, please keep this notice intact. * 13 * * 14 \****************************************************************/ 15 16 /* Version 0.4 17 */ 18 19 #ifndef INCLUDED_HSPSET_H 20 #define INCLUDED_HSPSET_H 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif /* __cplusplus */ 25 26 #include <glib.h> 27 28 #include "match.h" 29 #include "argument.h" 30 #include "recyclebin.h" 31 #include "pqueue.h" 32 #include "matrix.h" 33 #include "wordhood.h" 34 #include "recyclebin.h" 35 #include "threadref.h" 36 37 typedef struct { 38 gint filter_threshold; 39 gboolean use_wordhood_dropoff; 40 gint seed_repeat; 41 /**/ 42 gint dna_wordlen; 43 gint protein_wordlen; 44 gint codon_wordlen; 45 /**/ 46 gint dna_hsp_dropoff; 47 gint protein_hsp_dropoff; 48 gint codon_hsp_dropoff; 49 /**/ 50 gint dna_hsp_threshold; 51 gint protein_hsp_threshold; 52 gint codon_hsp_threshold; 53 /**/ 54 gint dna_word_limit; 55 gint protein_word_limit; 56 gint codon_word_limit; 57 /**/ 58 gint geneseed_threshold; 59 gint geneseed_repeat; 60 /**/ 61 } HSPset_ArgumentSet; 62 63 HSPset_ArgumentSet *HSPset_ArgumentSet_create(Argument *arg); 64 65 /**/ 66 67 typedef struct { 68 guint query_start; 69 guint target_start; 70 guint length; /* Length is number of match state visits */ 71 Match_Score score; 72 guint cobs; /* cobs == Centre Offset By Score */ 73 struct HSPset *hsp_set; /* Never included in hsp_set->ref_count */ 74 } HSP; 75 /* FIXME: remove hsp_set from HSP to save space ? */ 76 77 void HSP_destroy(HSP *hsp); 78 79 #define HSP_query_advance(hsp) \ 80 ((hsp)->hsp_set->param->match->query->advance) 81 82 #define HSP_target_advance(hsp) \ 83 ((hsp)->hsp_set->param->match->target->advance) 84 85 #define HSP_query_end(hsp) \ 86 ((hsp)->query_start \ 87 + ((hsp)->length*HSP_query_advance(hsp))) 88 89 #define HSP_target_end(hsp) \ 90 ((hsp)->target_start \ 91 + ((hsp)->length*HSP_target_advance(hsp))) 92 93 #define HSP_query_cobs(hsp) \ 94 ((hsp)->query_start \ 95 +((hsp)->cobs*HSP_query_advance(hsp))) 96 97 #define HSP_target_cobs(hsp) \ 98 ((hsp)->target_start \ 99 + ((hsp)->cobs*HSP_target_advance(hsp))) 100 101 #define HSP_diagonal(hsp) \ 102 (((hsp)->target_start*HSP_query_advance(hsp)) \ 103 -((hsp)->query_start*HSP_target_advance(hsp))) 104 /* advance_{query,target} are swapped for position on diagonal */ 105 106 #define HSP_get_score(hsp, query_pos, target_pos) \ 107 ((hsp)->hsp_set->param->match->score_func( \ 108 (hsp)->hsp_set->param->match, \ 109 (hsp)->hsp_set->query, (hsp)->hsp_set->target, \ 110 (query_pos), (target_pos))) 111 112 #define HSP_get_display(hsp, query_pos, target_pos, display_str) \ 113 ((hsp)->hsp_set->param->match->display_func( \ 114 (hsp)->hsp_set->param->match, \ 115 (hsp)->hsp_set->query, (hsp)->hsp_set->target, \ 116 (query_pos), (target_pos), display_str)) 117 118 #define HSP_query_masked(hsp, query_pos) \ 119 ((hsp)->hsp_set->param->match->query->mask_func( \ 120 (hsp)->hsp_set->param->match->query, \ 121 (hsp)->hsp_set->query, (query_pos))) 122 123 #define HSP_target_masked(hsp, target_pos) \ 124 ((hsp)->hsp_set->param->match->target->mask_func( \ 125 (hsp)->hsp_set->param->match->target, \ 126 (hsp)->hsp_set->target, (target_pos))) 127 128 #define HSP_query_self(hsp, query_pos) \ 129 ((hsp)->hsp_set->param->match->query->self_func( \ 130 (hsp)->hsp_set->param->match->query, \ 131 (hsp)->hsp_set->query, (query_pos))) 132 133 #define HSP_target_self(hsp, target_pos) \ 134 ((hsp)->hsp_set->param->match->target->self_func( \ 135 (hsp)->hsp_set->param->match->target, \ 136 (hsp)->hsp_set->target, (target_pos))) 137 138 #define HSPset_is_empty(hspset) ((hspset)->is_empty) 139 140 typedef struct HSP_Param { 141 ThreadRef *thread_ref; 142 HSPset_ArgumentSet *has; 143 Match *match; 144 gint wordlen; 145 gint seedlen; 146 Match_Score dropoff; 147 Match_Score threshold; 148 Match_Score wordlimit; 149 WordHood *wordhood; 150 gboolean use_horizon; 151 gint seed_repeat; 152 RecycleBin *hsp_recycle; 153 #ifdef USE_PTHREADS 154 pthread_mutex_t hsp_recycle_lock; 155 #endif /* USE_PTHREADS */ 156 } HSP_Param; 157 158 HSP_Param *HSP_Param_create(Match *match, gboolean use_horizon); 159 void HSP_Param_destroy(HSP_Param *hsp_param); 160 HSP_Param *HSP_Param_share(HSP_Param *hsp_param); 161 HSP_Param *HSP_Param_swap(HSP_Param *hsp_param); 162 163 void HSP_Param_set_wordlen(HSP_Param *hsp_param, gint wordlen); 164 /**/ 165 void HSP_Param_set_dna_hsp_threshold(HSP_Param *hsp_param, 166 gint dna_hsp_threshold); 167 void HSP_Param_set_protein_hsp_threshold(HSP_Param *hsp_param, 168 gint protein_hsp_threshold); 169 void HSP_Param_set_codon_hsp_threshold(HSP_Param *hsp_param, 170 gint protein_hsp_threshold); 171 /**/ 172 void HSP_Param_set_dna_word_limit(HSP_Param *hsp_param, 173 gint dna_word_limit); 174 void HSP_Param_set_protein_word_limit(HSP_Param *hsp_param, 175 gint protein_word_limit); 176 void HSP_Param_set_codon_word_limit(HSP_Param *hsp_param, 177 gint codon_word_limit); 178 /**/ 179 void HSP_Param_set_dna_hsp_dropoff(HSP_Param *hsp_param, 180 gint dna_hsp_dropoff); 181 void HSP_Param_set_protein_hsp_dropoff(HSP_Param *hsp_param, 182 gint dna_hsp_dropoff); 183 void HSP_Param_set_codon_hsp_dropoff(HSP_Param *hsp_param, 184 gint dna_hsp_dropoff); 185 /**/ 186 void HSP_Param_set_hsp_threshold(HSP_Param *hsp_param, 187 gint hsp_threshold); 188 void HSP_Param_set_seed_repeat(HSP_Param *hsp_param, 189 gint seed_repeat); 190 191 typedef struct HSPset { 192 gint ref_count; 193 Sequence *query; 194 Sequence *target; 195 HSP_Param *param; 196 gint ****horizon; 197 GPtrArray *hsp_list; 198 /**/ 199 gboolean is_finalised; 200 PQueue **filter; 201 gboolean is_empty; 202 PQueueSet *pqueue_set; 203 } HSPset; 204 205 HSPset *HSPset_create(Sequence *query, Sequence *target, 206 HSP_Param *hsp_param); 207 HSPset *HSPset_share(HSPset *hsp_set); 208 void HSPset_destroy(HSPset *hsp_set); 209 void HSPset_swap(HSPset *hsp_set, HSP_Param *hsp_param); 210 void HSPset_revcomp(HSPset *hsp_set); 211 void HSPset_seed_hsp(HSPset *hsp_set, 212 guint query_start, guint target_start); 213 void HSPset_add_known_hsp(HSPset *hsp_set, 214 guint query_start, guint target_start, 215 guint length); 216 void HSPset_seed_all_hsps(HSPset *hsp_set, 217 guint *seed_list, guint seed_list_len); 218 /* HSPset_seed_all_hsps() can only be called once on the HSPset. 219 * It automatically finalises the HSPset 220 * position_list should contain (qpos,tpos) pairs, 221 * and may be sorted in place. 222 */ 223 224 HSPset *HSPset_finalise(HSPset *hsp_set); 225 226 void HSP_print(HSP *hsp, gchar *name); 227 void HSPset_print(HSPset *hsp_set); 228 229 void HSPset_filter_ungapped(HSPset *hsp_set); 230 /* Remove HSPs which overlap by more than 50% of the score. 231 * This is used for 3:3 ungapped alignments. 232 */ 233 234 /**/ 235 236 typedef struct HSPset_SList_Node { 237 struct HSPset_SList_Node *next; 238 gint query_pos; 239 gint target_pos; 240 } HSPset_SList_Node; 241 242 RecycleBin *HSPset_SList_RecycleBin_create(void); 243 244 HSPset_SList_Node *HSPset_SList_append(RecycleBin *recycle_bin, 245 HSPset_SList_Node *next, 246 gint query_pos, gint target_pos); 247 void HSPset_seed_all_qy_sorted(HSPset *hsp_set, HSPset_SList_Node *seed_list); 248 249 /**/ 250 251 #ifdef __cplusplus 252 } 253 #endif /* __cplusplus */ 254 255 #endif /* INCLUDED_HSPSET_H */ 256 257