1 /****************************************************************\
2 *                                                                *
3 *  Interface for different types of alignment model              *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include "modeltype.h"
17 
18 #include "ungapped.h"
19 #include "affine.h"
20 #include "est2genome.h"
21 #include "ner.h"
22 #include "protein2dna.h"
23 #include "protein2genome.h"
24 #include "coding2coding.h"
25 #include "coding2genome.h"
26 #include "cdna2genome.h"
27 #include "genome2genome.h"
28 
Model_Type_to_string(Model_Type type)29 gchar *Model_Type_to_string(Model_Type type){
30     register gchar *name = NULL;
31     switch(type){
32         case Model_Type_UNGAPPED:
33             name = "ungapped";
34             break;
35         case Model_Type_UNGAPPED_TRANS:
36             name = "ungapped:trans";
37             break;
38         case Model_Type_AFFINE_GLOBAL:
39             name = "affine:global";
40             break;
41         case Model_Type_AFFINE_BESTFIT:
42             name = "affine:bestfit";
43             break;
44         case Model_Type_AFFINE_LOCAL:
45             name = "affine:local";
46             break;
47         case Model_Type_AFFINE_OVERLAP:
48             name = "affine:overlap";
49             break;
50         case Model_Type_EST2GENOME:
51             name = "est2genome";
52             break;
53         case Model_Type_NER:
54             name = "ner";
55             break;
56         case Model_Type_PROTEIN2DNA:
57             name = "protein2dna";
58             break;
59         case Model_Type_PROTEIN2DNA_BESTFIT:
60             name = "protein2dna:bestfit";
61             break;
62         case Model_Type_PROTEIN2GENOME:
63             name = "protein2genome";
64             break;
65         case Model_Type_PROTEIN2GENOME_BESTFIT:
66             name = "protein2genome:bestfit";
67             break;
68         case Model_Type_CODING2CODING:
69             name = "coding2coding";
70             break;
71         case Model_Type_CODING2GENOME:
72             name = "coding2genome";
73             break;
74         case Model_Type_CDNA2GENOME:
75             name = "cdna2genome";
76             break;
77         case Model_Type_GENOME2GENOME:
78             name = "genome2genome";
79             break;
80         default:
81             g_error("Unknown Model Type [%d]", type);
82             break;
83         }
84     return name;
85     }
86 
Model_Type_from_string(gchar * str)87 Model_Type Model_Type_from_string(gchar *str){
88     gchar *name[Model_Type_TOTAL] = {
89          "ungapped", "ungapped:trans",
90          "affine:global", "affine:bestfit",
91          "affine:local", "affine:overlap",
92          "est2genome", "ner", "protein2dna", "protein2dna:bestfit",
93          "protein2genome", "protein2genome:bestfit",
94          "coding2coding", "coding2genome", "cdna2genome",
95          "genome2genome"};
96     gchar *short_name[Model_Type_TOTAL] = {
97          "u", "u:t",
98          "a:g", "a:b", "a:l", "a:o",
99          "e2g", "ner",
100          "p2d", "p2d:b", "p2g", "p2g:b",
101          "c2c", "c2g", "cd2g", "g2g"};
102     Model_Type type[Model_Type_TOTAL] = {
103           Model_Type_UNGAPPED,
104           Model_Type_UNGAPPED_TRANS,
105           Model_Type_AFFINE_GLOBAL,
106           Model_Type_AFFINE_BESTFIT,
107           Model_Type_AFFINE_LOCAL,
108           Model_Type_AFFINE_OVERLAP,
109           Model_Type_EST2GENOME,
110           Model_Type_NER,
111           Model_Type_PROTEIN2DNA,
112           Model_Type_PROTEIN2DNA_BESTFIT,
113           Model_Type_PROTEIN2GENOME,
114           Model_Type_PROTEIN2GENOME_BESTFIT,
115           Model_Type_CODING2CODING,
116           Model_Type_CODING2GENOME,
117           Model_Type_CDNA2GENOME,
118           Model_Type_GENOME2GENOME};
119     register gint i;
120     for(i = 0; i < Model_Type_TOTAL; i++)
121         if(!g_strcasecmp(name[i], str))
122             return type[i];
123     for(i = 0; i < Model_Type_TOTAL; i++)
124         if(!g_strcasecmp(short_name[i], str))
125             return type[i];
126     g_error("Unknown model type [%s]", str);
127     return Model_Type_UNGAPPED; /* Not reached */
128     }
129 
Model_Type_is_gapped(Model_Type type)130 gboolean Model_Type_is_gapped(Model_Type type){
131     if((type == Model_Type_UNGAPPED)
132     || (type == Model_Type_UNGAPPED_TRANS))
133         return FALSE;
134     return TRUE;
135     }
136 
Model_Type_translate_both(Model_Type type)137 gboolean Model_Type_translate_both(Model_Type type){
138     if((type == Model_Type_UNGAPPED_TRANS)
139     || (type == Model_Type_CODING2CODING)
140     || (type == Model_Type_CODING2GENOME)
141     || (type == Model_Type_CDNA2GENOME)
142     || (type == Model_Type_GENOME2GENOME))
143         return TRUE;
144     return FALSE;
145     }
146 
Model_Type_has_dual_match(Model_Type type)147 gboolean Model_Type_has_dual_match(Model_Type type){
148     if((type == Model_Type_CDNA2GENOME)
149     || (type == Model_Type_GENOME2GENOME))
150         return TRUE;
151     return FALSE;
152     }
153 
Model_Type_has_genomic_target(Model_Type type)154 gboolean Model_Type_has_genomic_target(Model_Type type){
155     if((type == Model_Type_EST2GENOME)
156     || (type == Model_Type_PROTEIN2GENOME)
157     || (type == Model_Type_PROTEIN2GENOME_BESTFIT)
158     || (type == Model_Type_CODING2GENOME)
159     || (type == Model_Type_CDNA2GENOME)
160     || (type == Model_Type_GENOME2GENOME))
161         return TRUE;
162     return FALSE;
163     }
164 
Model_Type_check_input(Model_Type type,Alphabet_Type query_type,Alphabet_Type target_type)165 static void Model_Type_check_input(Model_Type type,
166                                    Alphabet_Type query_type,
167                                    Alphabet_Type target_type){
168     switch(type){
169         case Model_Type_UNGAPPED:
170             break;
171         case Model_Type_UNGAPPED_TRANS:
172         case Model_Type_EST2GENOME:
173         case Model_Type_CODING2CODING:
174         case Model_Type_CODING2GENOME:
175         case Model_Type_CDNA2GENOME:
176         case Model_Type_GENOME2GENOME:
177             if(query_type != Alphabet_Type_DNA)
178                 g_error("Expected DNA query (not %s) for model [%s]",
179                         Alphabet_Type_get_name(query_type),
180                         Model_Type_to_string(type));
181             if(target_type != Alphabet_Type_DNA)
182                 g_error("Expected DNA target (not %s) for model [%s]",
183                         Alphabet_Type_get_name(target_type),
184                         Model_Type_to_string(type));
185             break;
186         case Model_Type_AFFINE_GLOBAL:
187         case Model_Type_AFFINE_BESTFIT:
188         case Model_Type_AFFINE_LOCAL:
189         case Model_Type_AFFINE_OVERLAP:
190         case Model_Type_NER:
191             if(query_type != target_type)
192                 g_error("Expected similar sequence types for model"
193                         " [%s] (not %s:%s)",
194                        Model_Type_to_string(type),
195                        Alphabet_Type_get_name(query_type),
196                        Alphabet_Type_get_name(target_type));
197             if(query_type == Alphabet_Type_UNKNOWN)
198                 g_error("Model [%s] cannot use unknown sequence type",
199                         Model_Type_to_string(type));
200             break;
201         case Model_Type_PROTEIN2DNA:
202         case Model_Type_PROTEIN2DNA_BESTFIT:
203         case Model_Type_PROTEIN2GENOME:
204         case Model_Type_PROTEIN2GENOME_BESTFIT:
205             /* qy == AA, tg = NT */
206             if(query_type != Alphabet_Type_PROTEIN)
207                 g_error(
208                     "Expected protein query (not %s) for model [%s]",
209                         Alphabet_Type_get_name(query_type),
210                         Model_Type_to_string(type));
211             if(target_type != Alphabet_Type_DNA)
212                 g_error("Expected DNA target (not %s) for model [%s]",
213                         Alphabet_Type_get_name(target_type),
214                         Model_Type_to_string(type));
215             break;
216         default:
217             g_error("Unknown model type [%s]",
218                     Model_Type_to_string(type));
219             break;
220         }
221     return;
222     }
223 
Model_Type_get_model(Model_Type type,Alphabet_Type query_type,Alphabet_Type target_type)224 C4_Model *Model_Type_get_model(Model_Type type,
225                                Alphabet_Type query_type,
226                                Alphabet_Type target_type){
227     register C4_Model *model = NULL;
228     register Match_Type match_type;
229     Model_Type_check_input(type, query_type, target_type);
230     switch(type){
231         case Model_Type_UNGAPPED:
232             match_type = Match_Type_find(query_type, target_type,
233                                          FALSE);
234             model = Ungapped_create(match_type);
235             break;
236         case Model_Type_UNGAPPED_TRANS:
237             match_type = Match_Type_find(query_type, target_type,
238                                          TRUE);
239             model = Ungapped_create(match_type);
240             break;
241         case Model_Type_AFFINE_GLOBAL:
242             model = Affine_create(Affine_Model_Type_GLOBAL,
243                                   query_type, target_type, FALSE);
244             break;
245         case Model_Type_AFFINE_BESTFIT:
246             model = Affine_create(Affine_Model_Type_BESTFIT,
247                                   query_type, target_type, FALSE);
248             break;
249         case Model_Type_AFFINE_LOCAL:
250             model = Affine_create(Affine_Model_Type_LOCAL,
251                                   query_type, target_type, FALSE);
252             break;
253         case Model_Type_AFFINE_OVERLAP:
254             model = Affine_create(Affine_Model_Type_OVERLAP,
255                                   query_type, target_type, FALSE);
256             break;
257         case Model_Type_EST2GENOME:
258             model = EST2Genome_create();
259             break;
260         case Model_Type_NER:
261             model = NER_create(query_type, target_type);
262             break;
263         case Model_Type_PROTEIN2DNA:
264             model = Protein2DNA_create(Affine_Model_Type_LOCAL);
265             break;
266         case Model_Type_PROTEIN2DNA_BESTFIT:
267             model = Protein2DNA_create(Affine_Model_Type_BESTFIT);
268             break;
269         case Model_Type_PROTEIN2GENOME:
270             model = Protein2Genome_create(Affine_Model_Type_LOCAL);
271             break;
272         case Model_Type_PROTEIN2GENOME_BESTFIT:
273             model = Protein2Genome_create(Affine_Model_Type_BESTFIT);
274             break;
275         case Model_Type_CODING2CODING:
276             model = Coding2Coding_create();
277             break;
278         case Model_Type_CODING2GENOME:
279             model = Coding2Genome_create();
280             break;
281         case Model_Type_CDNA2GENOME:
282             model = CDNA2Genome_create();
283             break;
284         case Model_Type_GENOME2GENOME:
285             model = Genome2Genome_create();
286             break;
287         default:
288             g_error("Unknown Model Type [%d]", type);
289             break;
290         }
291     return model;
292     }
293 
Model_Type_create_data(Model_Type type,Sequence * query,Sequence * target)294 gpointer Model_Type_create_data(Model_Type type,
295                                 Sequence *query, Sequence *target){
296     register gpointer model_data = NULL;
297     register Match_Type match_type;
298     switch(type){
299         case Model_Type_UNGAPPED:
300             match_type = Match_Type_find(query->alphabet->type,
301                                          target->alphabet->type,
302                                          FALSE);
303             model_data = Ungapped_Data_create(query, target,
304                                               match_type);
305             break;
306         case Model_Type_UNGAPPED_TRANS:
307             match_type = Match_Type_find(query->alphabet->type,
308                                          target->alphabet->type,
309                                          TRUE);
310             model_data = Ungapped_Data_create(query, target,
311                                               match_type);
312             break;
313         case Model_Type_AFFINE_GLOBAL:
314             /*fallthrough*/
315         case Model_Type_AFFINE_BESTFIT:
316             /*fallthrough*/
317         case Model_Type_AFFINE_LOCAL:
318             /*fallthrough*/
319         case Model_Type_AFFINE_OVERLAP:
320             model_data = Affine_Data_create(query, target, FALSE);
321             break;
322         case Model_Type_EST2GENOME:
323             model_data = EST2Genome_Data_create(query, target);
324             break;
325         case Model_Type_NER:
326             model_data = NER_Data_create(query, target);
327             break;
328         case Model_Type_PROTEIN2DNA:
329             /*fallthrough*/
330         case Model_Type_PROTEIN2DNA_BESTFIT:
331             model_data = Protein2DNA_Data_create(query, target);
332             break;
333         case Model_Type_PROTEIN2GENOME:
334             /*fallthrough*/
335         case Model_Type_PROTEIN2GENOME_BESTFIT:
336             model_data = Protein2Genome_Data_create(query, target);
337             break;
338         case Model_Type_CODING2CODING:
339             model_data = Coding2Coding_Data_create(query, target);
340             break;
341         case Model_Type_CODING2GENOME:
342             model_data = Coding2Genome_Data_create(query, target);
343             break;
344         case Model_Type_CDNA2GENOME:
345             model_data = CDNA2Genome_Data_create(query, target);
346             break;
347         case Model_Type_GENOME2GENOME:
348             model_data = Genome2Genome_Data_create(query, target);
349             break;
350         default:
351             g_error("Unknown Model Type [%d]", type);
352         }
353     return model_data;
354     }
355 
Model_Type_destroy_data(Model_Type type,gpointer model_data)356 void Model_Type_destroy_data(Model_Type type, gpointer model_data){
357     switch(type){
358         case Model_Type_UNGAPPED:
359             /*fallthrough*/
360         case Model_Type_UNGAPPED_TRANS:
361             Ungapped_Data_destroy(model_data);
362             break;
363         case Model_Type_AFFINE_GLOBAL:
364             /*fallthrough*/
365         case Model_Type_AFFINE_BESTFIT:
366             /*fallthrough*/
367         case Model_Type_AFFINE_LOCAL:
368             /*fallthrough*/
369         case Model_Type_AFFINE_OVERLAP:
370             Affine_Data_destroy(model_data);
371             break;
372         case Model_Type_EST2GENOME:
373             EST2Genome_Data_destroy(model_data);
374             break;
375         case Model_Type_NER:
376             NER_Data_destroy(model_data);
377             break;
378         case Model_Type_PROTEIN2DNA:
379             /*fallthrough*/
380         case Model_Type_PROTEIN2DNA_BESTFIT:
381             Protein2DNA_Data_destroy(model_data);
382             break;
383         case Model_Type_PROTEIN2GENOME:
384             /*fallthrough*/
385         case Model_Type_PROTEIN2GENOME_BESTFIT:
386             Protein2Genome_Data_destroy(model_data);
387             break;
388         case Model_Type_CODING2CODING:
389             Coding2Coding_Data_destroy(model_data);
390             break;
391         case Model_Type_CODING2GENOME:
392             Coding2Genome_Data_destroy(model_data);
393             break;
394         case Model_Type_CDNA2GENOME:
395             CDNA2Genome_Data_destroy(model_data);
396             break;
397         case Model_Type_GENOME2GENOME:
398             Genome2Genome_Data_destroy(model_data);
399             break;
400         default:
401             g_error("Unknown Model Type [%d]", type);
402         }
403     return;
404     }
405 
406