1 /****************************************************************\
2 *                                                                *
3 *  Library for word-neighbourhood generation                     *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #ifndef INCLUDED_WORDHOOD_H
17 #define INCLUDED_WORDHOOD_H
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif /* __cplusplus */
22 
23 #include <glib.h>
24 #include <stdio.h>
25 #include <limits.h>
26 
27 #include "submat.h"
28 #include "codonsubmat.h"
29 
30 #ifndef ALPHABETSIZE
31 #define ALPHABETSIZE (1<<(CHAR_BIT))
32 #endif /* ALPHABETSIZE */
33 
34 typedef struct WordHood_Alphabet {
35          gint   ref_count;
36          gint   advance;
37      gpointer   user_data;
38          gint   input_index[ALPHABETSIZE];
39          gint   output_index[ALPHABETSIZE];
40     GPtrArray  *member_list;
41        Submat  *submat;
42   CodonSubmat  *codon_submat;
43          gint (*score_func)(struct WordHood_Alphabet *wha,
44                             gchar *seq_a, gchar *seq_b);
45      gboolean (*is_valid_func)(struct WordHood_Alphabet *wha,
46                                gchar *seq);
47          gint (*index_func)(struct WordHood_Alphabet *wha, gchar *seq);
48 } WordHood_Alphabet;
49 /* Invalid input gets -1 from the index_func
50  * Output is possible for all of member_list
51  */
52 
53 WordHood_Alphabet *WordHood_Alphabet_create_from_Submat(
54                         gchar *input_alphabet,
55                         gchar *output_alphabet, Submat *submat,
56                         gboolean case_sensitive_input);
57 WordHood_Alphabet *WordHood_Alphabet_create_from_CodonSubmat(
58                     CodonSubmat *cs, gboolean case_sensitive_input);
59 WordHood_Alphabet *WordHood_Alphabet_share(WordHood_Alphabet *wha);
60 void WordHood_Alphabet_destroy(WordHood_Alphabet *wha);
61 
62 /* Using different input and output alphabets allows
63  * generation of a neighbourhood covering redundant symbols
64  *
65  */
66 
67 typedef gboolean (*WordHood_Traverse_Func)(gchar *word,
68                   gint score, gpointer user_data);
69 /* Return TRUE to stop the traversal */
70 
71 typedef struct {
72          WordHood_Alphabet *wha;
73                       gint  threshold;
74                   gboolean  use_dropoff;
75                      gchar *orig_word;
76                      gchar *curr_word;
77                       gint *depth_threshold;
78                       gint  word_pos;
79                       gint  curr_score;
80                       gint  curr_len;
81                       gint  alloc_len;
82 } WordHood;
83 
84 WordHood *WordHood_create(WordHood_Alphabet *wha,
85                           gint threshold, gboolean use_dropoff);
86 
87 /* When use_dropoff is FALSE:
88  *   - each word will have a score of at least the threshold
89  * When use_dropoff is TRUE:
90  *   - each word will have a score within the threshold
91  *     of the score given by comparison to itself
92  *
93  * Thus threshold=0, use_dropoff=TRUE will yield the minimum wordhood
94  * (ie. just valid words from the query).
95  */
96 
97 void WordHood_destroy(WordHood *wh);
98 void WordHood_info(WordHood *wh);
99 
100 void WordHood_traverse(WordHood *wh, WordHood_Traverse_Func whtf,
101                        gchar *word, gint len, gpointer user_data);
102 
103 #ifdef __cplusplus
104 }
105 #endif /* __cplusplus */
106 
107 #endif /* INCLUDED_WORDHOOD_H */
108 
109