1 /****************************************************************\
2 * *
3 * Interface for different types of alignment model *
4 * *
5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
6 * Copyright (C) 2000-2009. All Rights Reserved. *
7 * *
8 * This source code is distributed under the terms of the *
9 * GNU General Public License, version 3. See the file COPYING *
10 * or http://www.gnu.org/licenses/gpl.txt for details *
11 * *
12 * If you use this code, please keep this notice intact. *
13 * *
14 \****************************************************************/
15
16 #include "modeltype.h"
17
18 #include "ungapped.h"
19 #include "affine.h"
20 #include "est2genome.h"
21 #include "ner.h"
22 #include "protein2dna.h"
23 #include "protein2genome.h"
24 #include "coding2coding.h"
25 #include "coding2genome.h"
26 #include "cdna2genome.h"
27 #include "genome2genome.h"
28
Model_Type_to_string(Model_Type type)29 gchar *Model_Type_to_string(Model_Type type){
30 register gchar *name = NULL;
31 switch(type){
32 case Model_Type_UNGAPPED:
33 name = "ungapped";
34 break;
35 case Model_Type_UNGAPPED_TRANS:
36 name = "ungapped:trans";
37 break;
38 case Model_Type_AFFINE_GLOBAL:
39 name = "affine:global";
40 break;
41 case Model_Type_AFFINE_BESTFIT:
42 name = "affine:bestfit";
43 break;
44 case Model_Type_AFFINE_LOCAL:
45 name = "affine:local";
46 break;
47 case Model_Type_AFFINE_OVERLAP:
48 name = "affine:overlap";
49 break;
50 case Model_Type_EST2GENOME:
51 name = "est2genome";
52 break;
53 case Model_Type_NER:
54 name = "ner";
55 break;
56 case Model_Type_PROTEIN2DNA:
57 name = "protein2dna";
58 break;
59 case Model_Type_PROTEIN2DNA_BESTFIT:
60 name = "protein2dna:bestfit";
61 break;
62 case Model_Type_PROTEIN2GENOME:
63 name = "protein2genome";
64 break;
65 case Model_Type_PROTEIN2GENOME_BESTFIT:
66 name = "protein2genome:bestfit";
67 break;
68 case Model_Type_CODING2CODING:
69 name = "coding2coding";
70 break;
71 case Model_Type_CODING2GENOME:
72 name = "coding2genome";
73 break;
74 case Model_Type_CDNA2GENOME:
75 name = "cdna2genome";
76 break;
77 case Model_Type_GENOME2GENOME:
78 name = "genome2genome";
79 break;
80 default:
81 g_error("Unknown Model Type [%d]", type);
82 break;
83 }
84 return name;
85 }
86
Model_Type_from_string(gchar * str)87 Model_Type Model_Type_from_string(gchar *str){
88 gchar *name[Model_Type_TOTAL] = {
89 "ungapped", "ungapped:trans",
90 "affine:global", "affine:bestfit",
91 "affine:local", "affine:overlap",
92 "est2genome", "ner", "protein2dna", "protein2dna:bestfit",
93 "protein2genome", "protein2genome:bestfit",
94 "coding2coding", "coding2genome", "cdna2genome",
95 "genome2genome"};
96 gchar *short_name[Model_Type_TOTAL] = {
97 "u", "u:t",
98 "a:g", "a:b", "a:l", "a:o",
99 "e2g", "ner",
100 "p2d", "p2d:b", "p2g", "p2g:b",
101 "c2c", "c2g", "cd2g", "g2g"};
102 Model_Type type[Model_Type_TOTAL] = {
103 Model_Type_UNGAPPED,
104 Model_Type_UNGAPPED_TRANS,
105 Model_Type_AFFINE_GLOBAL,
106 Model_Type_AFFINE_BESTFIT,
107 Model_Type_AFFINE_LOCAL,
108 Model_Type_AFFINE_OVERLAP,
109 Model_Type_EST2GENOME,
110 Model_Type_NER,
111 Model_Type_PROTEIN2DNA,
112 Model_Type_PROTEIN2DNA_BESTFIT,
113 Model_Type_PROTEIN2GENOME,
114 Model_Type_PROTEIN2GENOME_BESTFIT,
115 Model_Type_CODING2CODING,
116 Model_Type_CODING2GENOME,
117 Model_Type_CDNA2GENOME,
118 Model_Type_GENOME2GENOME};
119 register gint i;
120 for(i = 0; i < Model_Type_TOTAL; i++)
121 if(!g_strcasecmp(name[i], str))
122 return type[i];
123 for(i = 0; i < Model_Type_TOTAL; i++)
124 if(!g_strcasecmp(short_name[i], str))
125 return type[i];
126 g_error("Unknown model type [%s]", str);
127 return Model_Type_UNGAPPED; /* Not reached */
128 }
129
Model_Type_is_gapped(Model_Type type)130 gboolean Model_Type_is_gapped(Model_Type type){
131 if((type == Model_Type_UNGAPPED)
132 || (type == Model_Type_UNGAPPED_TRANS))
133 return FALSE;
134 return TRUE;
135 }
136
Model_Type_translate_both(Model_Type type)137 gboolean Model_Type_translate_both(Model_Type type){
138 if((type == Model_Type_UNGAPPED_TRANS)
139 || (type == Model_Type_CODING2CODING)
140 || (type == Model_Type_CODING2GENOME)
141 || (type == Model_Type_CDNA2GENOME)
142 || (type == Model_Type_GENOME2GENOME))
143 return TRUE;
144 return FALSE;
145 }
146
Model_Type_has_dual_match(Model_Type type)147 gboolean Model_Type_has_dual_match(Model_Type type){
148 if((type == Model_Type_CDNA2GENOME)
149 || (type == Model_Type_GENOME2GENOME))
150 return TRUE;
151 return FALSE;
152 }
153
Model_Type_has_genomic_target(Model_Type type)154 gboolean Model_Type_has_genomic_target(Model_Type type){
155 if((type == Model_Type_EST2GENOME)
156 || (type == Model_Type_PROTEIN2GENOME)
157 || (type == Model_Type_PROTEIN2GENOME_BESTFIT)
158 || (type == Model_Type_CODING2GENOME)
159 || (type == Model_Type_CDNA2GENOME)
160 || (type == Model_Type_GENOME2GENOME))
161 return TRUE;
162 return FALSE;
163 }
164
Model_Type_check_input(Model_Type type,Alphabet_Type query_type,Alphabet_Type target_type)165 static void Model_Type_check_input(Model_Type type,
166 Alphabet_Type query_type,
167 Alphabet_Type target_type){
168 switch(type){
169 case Model_Type_UNGAPPED:
170 break;
171 case Model_Type_UNGAPPED_TRANS:
172 case Model_Type_EST2GENOME:
173 case Model_Type_CODING2CODING:
174 case Model_Type_CODING2GENOME:
175 case Model_Type_CDNA2GENOME:
176 case Model_Type_GENOME2GENOME:
177 if(query_type != Alphabet_Type_DNA)
178 g_error("Expected DNA query (not %s) for model [%s]",
179 Alphabet_Type_get_name(query_type),
180 Model_Type_to_string(type));
181 if(target_type != Alphabet_Type_DNA)
182 g_error("Expected DNA target (not %s) for model [%s]",
183 Alphabet_Type_get_name(target_type),
184 Model_Type_to_string(type));
185 break;
186 case Model_Type_AFFINE_GLOBAL:
187 case Model_Type_AFFINE_BESTFIT:
188 case Model_Type_AFFINE_LOCAL:
189 case Model_Type_AFFINE_OVERLAP:
190 case Model_Type_NER:
191 if(query_type != target_type)
192 g_error("Expected similar sequence types for model"
193 " [%s] (not %s:%s)",
194 Model_Type_to_string(type),
195 Alphabet_Type_get_name(query_type),
196 Alphabet_Type_get_name(target_type));
197 if(query_type == Alphabet_Type_UNKNOWN)
198 g_error("Model [%s] cannot use unknown sequence type",
199 Model_Type_to_string(type));
200 break;
201 case Model_Type_PROTEIN2DNA:
202 case Model_Type_PROTEIN2DNA_BESTFIT:
203 case Model_Type_PROTEIN2GENOME:
204 case Model_Type_PROTEIN2GENOME_BESTFIT:
205 /* qy == AA, tg = NT */
206 if(query_type != Alphabet_Type_PROTEIN)
207 g_error(
208 "Expected protein query (not %s) for model [%s]",
209 Alphabet_Type_get_name(query_type),
210 Model_Type_to_string(type));
211 if(target_type != Alphabet_Type_DNA)
212 g_error("Expected DNA target (not %s) for model [%s]",
213 Alphabet_Type_get_name(target_type),
214 Model_Type_to_string(type));
215 break;
216 default:
217 g_error("Unknown model type [%s]",
218 Model_Type_to_string(type));
219 break;
220 }
221 return;
222 }
223
Model_Type_get_model(Model_Type type,Alphabet_Type query_type,Alphabet_Type target_type)224 C4_Model *Model_Type_get_model(Model_Type type,
225 Alphabet_Type query_type,
226 Alphabet_Type target_type){
227 register C4_Model *model = NULL;
228 register Match_Type match_type;
229 Model_Type_check_input(type, query_type, target_type);
230 switch(type){
231 case Model_Type_UNGAPPED:
232 match_type = Match_Type_find(query_type, target_type,
233 FALSE);
234 model = Ungapped_create(match_type);
235 break;
236 case Model_Type_UNGAPPED_TRANS:
237 match_type = Match_Type_find(query_type, target_type,
238 TRUE);
239 model = Ungapped_create(match_type);
240 break;
241 case Model_Type_AFFINE_GLOBAL:
242 model = Affine_create(Affine_Model_Type_GLOBAL,
243 query_type, target_type, FALSE);
244 break;
245 case Model_Type_AFFINE_BESTFIT:
246 model = Affine_create(Affine_Model_Type_BESTFIT,
247 query_type, target_type, FALSE);
248 break;
249 case Model_Type_AFFINE_LOCAL:
250 model = Affine_create(Affine_Model_Type_LOCAL,
251 query_type, target_type, FALSE);
252 break;
253 case Model_Type_AFFINE_OVERLAP:
254 model = Affine_create(Affine_Model_Type_OVERLAP,
255 query_type, target_type, FALSE);
256 break;
257 case Model_Type_EST2GENOME:
258 model = EST2Genome_create();
259 break;
260 case Model_Type_NER:
261 model = NER_create(query_type, target_type);
262 break;
263 case Model_Type_PROTEIN2DNA:
264 model = Protein2DNA_create(Affine_Model_Type_LOCAL);
265 break;
266 case Model_Type_PROTEIN2DNA_BESTFIT:
267 model = Protein2DNA_create(Affine_Model_Type_BESTFIT);
268 break;
269 case Model_Type_PROTEIN2GENOME:
270 model = Protein2Genome_create(Affine_Model_Type_LOCAL);
271 break;
272 case Model_Type_PROTEIN2GENOME_BESTFIT:
273 model = Protein2Genome_create(Affine_Model_Type_BESTFIT);
274 break;
275 case Model_Type_CODING2CODING:
276 model = Coding2Coding_create();
277 break;
278 case Model_Type_CODING2GENOME:
279 model = Coding2Genome_create();
280 break;
281 case Model_Type_CDNA2GENOME:
282 model = CDNA2Genome_create();
283 break;
284 case Model_Type_GENOME2GENOME:
285 model = Genome2Genome_create();
286 break;
287 default:
288 g_error("Unknown Model Type [%d]", type);
289 break;
290 }
291 return model;
292 }
293
Model_Type_create_data(Model_Type type,Sequence * query,Sequence * target)294 gpointer Model_Type_create_data(Model_Type type,
295 Sequence *query, Sequence *target){
296 register gpointer model_data = NULL;
297 register Match_Type match_type;
298 switch(type){
299 case Model_Type_UNGAPPED:
300 match_type = Match_Type_find(query->alphabet->type,
301 target->alphabet->type,
302 FALSE);
303 model_data = Ungapped_Data_create(query, target,
304 match_type);
305 break;
306 case Model_Type_UNGAPPED_TRANS:
307 match_type = Match_Type_find(query->alphabet->type,
308 target->alphabet->type,
309 TRUE);
310 model_data = Ungapped_Data_create(query, target,
311 match_type);
312 break;
313 case Model_Type_AFFINE_GLOBAL:
314 /*fallthrough*/
315 case Model_Type_AFFINE_BESTFIT:
316 /*fallthrough*/
317 case Model_Type_AFFINE_LOCAL:
318 /*fallthrough*/
319 case Model_Type_AFFINE_OVERLAP:
320 model_data = Affine_Data_create(query, target, FALSE);
321 break;
322 case Model_Type_EST2GENOME:
323 model_data = EST2Genome_Data_create(query, target);
324 break;
325 case Model_Type_NER:
326 model_data = NER_Data_create(query, target);
327 break;
328 case Model_Type_PROTEIN2DNA:
329 /*fallthrough*/
330 case Model_Type_PROTEIN2DNA_BESTFIT:
331 model_data = Protein2DNA_Data_create(query, target);
332 break;
333 case Model_Type_PROTEIN2GENOME:
334 /*fallthrough*/
335 case Model_Type_PROTEIN2GENOME_BESTFIT:
336 model_data = Protein2Genome_Data_create(query, target);
337 break;
338 case Model_Type_CODING2CODING:
339 model_data = Coding2Coding_Data_create(query, target);
340 break;
341 case Model_Type_CODING2GENOME:
342 model_data = Coding2Genome_Data_create(query, target);
343 break;
344 case Model_Type_CDNA2GENOME:
345 model_data = CDNA2Genome_Data_create(query, target);
346 break;
347 case Model_Type_GENOME2GENOME:
348 model_data = Genome2Genome_Data_create(query, target);
349 break;
350 default:
351 g_error("Unknown Model Type [%d]", type);
352 }
353 return model_data;
354 }
355
Model_Type_destroy_data(Model_Type type,gpointer model_data)356 void Model_Type_destroy_data(Model_Type type, gpointer model_data){
357 switch(type){
358 case Model_Type_UNGAPPED:
359 /*fallthrough*/
360 case Model_Type_UNGAPPED_TRANS:
361 Ungapped_Data_destroy(model_data);
362 break;
363 case Model_Type_AFFINE_GLOBAL:
364 /*fallthrough*/
365 case Model_Type_AFFINE_BESTFIT:
366 /*fallthrough*/
367 case Model_Type_AFFINE_LOCAL:
368 /*fallthrough*/
369 case Model_Type_AFFINE_OVERLAP:
370 Affine_Data_destroy(model_data);
371 break;
372 case Model_Type_EST2GENOME:
373 EST2Genome_Data_destroy(model_data);
374 break;
375 case Model_Type_NER:
376 NER_Data_destroy(model_data);
377 break;
378 case Model_Type_PROTEIN2DNA:
379 /*fallthrough*/
380 case Model_Type_PROTEIN2DNA_BESTFIT:
381 Protein2DNA_Data_destroy(model_data);
382 break;
383 case Model_Type_PROTEIN2GENOME:
384 /*fallthrough*/
385 case Model_Type_PROTEIN2GENOME_BESTFIT:
386 Protein2Genome_Data_destroy(model_data);
387 break;
388 case Model_Type_CODING2CODING:
389 Coding2Coding_Data_destroy(model_data);
390 break;
391 case Model_Type_CODING2GENOME:
392 Coding2Genome_Data_destroy(model_data);
393 break;
394 case Model_Type_CDNA2GENOME:
395 CDNA2Genome_Data_destroy(model_data);
396 break;
397 case Model_Type_GENOME2GENOME:
398 Genome2Genome_Data_destroy(model_data);
399 break;
400 default:
401 g_error("Unknown Model Type [%d]", type);
402 }
403 return;
404 }
405
406