1 /****************************************************************\
2 * *
3 * C4 dynamic programming library - alignment code *
4 * *
5 * Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
6 * Copyright (C) 2000-2009. All Rights Reserved. *
7 * *
8 * This source code is distributed under the terms of the *
9 * GNU General Public License, version 3. See the file COPYING *
10 * or http://www.gnu.org/licenses/gpl.txt for details *
11 * *
12 * If you use this code, please keep this notice intact. *
13 * *
14 \****************************************************************/
15
16 #include <string.h> /* For strlen() */
17 #include <ctype.h> /* For tolower() */
18 #include <time.h> /* For time() */
19
20 #include "match.h"
21 #include "alignment.h"
22
Alignment_ArgumentSet_create(Argument * arg)23 Alignment_ArgumentSet *Alignment_ArgumentSet_create(Argument *arg){
24 register ArgumentSet *as;
25 static Alignment_ArgumentSet aas = {80, TRUE};
26 if(arg){
27 as = ArgumentSet_create("Alignment options");
28 ArgumentSet_add_option(as, '\0', "alignmentwidth", NULL,
29 "Alignment display width", "80", Argument_parse_int,
30 &aas.alignment_width);
31 ArgumentSet_add_option(as, '\0', "forwardcoordinates", NULL,
32 "Report all coordinates on the forward strand", "TRUE",
33 Argument_parse_boolean, &aas.forward_strand_coords);
34 Argument_absorb_ArgumentSet(arg, as);
35 }
36 return &aas;
37 }
38
39 /**/
40
Alignment_create(C4_Model * model,Region * region,C4_Score score)41 Alignment *Alignment_create(C4_Model *model, Region *region,
42 C4_Score score){
43 register Alignment *alignment = g_new(Alignment, 1);
44 g_assert(model);
45 g_assert(region);
46 alignment->ref_count = 1;
47 alignment->region = Region_share(region);
48 alignment->score = score;
49 alignment->operation_list = g_ptr_array_new();
50 alignment->model = C4_Model_share(model);
51 return alignment;
52 }
53
Alignment_share(Alignment * alignment)54 Alignment *Alignment_share(Alignment *alignment){
55 g_assert(alignment);
56 alignment->ref_count++;
57 return alignment;
58 }
59
Alignment_destroy(Alignment * alignment)60 void Alignment_destroy(Alignment *alignment){
61 register gint i;
62 g_assert(alignment);
63 if(--alignment->ref_count)
64 return;
65 for(i = 0; i < alignment->operation_list->len; i++)
66 g_free(alignment->operation_list->pdata[i]);
67 g_ptr_array_free(alignment->operation_list, TRUE);
68 Region_destroy(alignment->region);
69 C4_Model_destroy(alignment->model);
70 g_free(alignment);
71 return;
72 }
73
Alignment_add(Alignment * alignment,C4_Transition * transition,gint length)74 void Alignment_add(Alignment *alignment, C4_Transition *transition,
75 gint length){
76 register AlignmentOperation *alignment_operation, *prev;
77 g_assert(alignment);
78 g_assert(transition);
79 if(alignment->operation_list->len){
80 prev = alignment->operation_list->pdata
81 [alignment->operation_list->len-1];
82 if(prev->transition == transition){
83 prev->length += length;
84 g_assert(prev->length >= 0); /* Cannot have -ve total */
85 if(prev->length == 0){ /* Drop the transition */
86 g_free(prev);
87 g_ptr_array_set_size(alignment->operation_list,
88 alignment->operation_list->len-1);
89 }
90 return;
91 }
92 g_assert(prev->transition->output == transition->input);
93 }
94 alignment_operation = g_new(AlignmentOperation, 1);
95 alignment_operation->transition = transition;
96 alignment_operation->length = length;
97 g_ptr_array_add(alignment->operation_list, alignment_operation);
98 return;
99 }
100
101 /**/
102
Alignment_match_get_symbol(Sequence * seq,gint pos,gint advance,Translate * translate)103 static gchar Alignment_match_get_symbol(Sequence *seq,
104 gint pos, gint advance,
105 Translate *translate){
106 register gchar symbol = '\0';
107 switch(advance){
108 case 1:
109 symbol = Sequence_get_symbol(seq, pos);
110 break;
111 case 3:
112 g_assert(translate);
113 symbol = Translate_base(translate,
114 Sequence_get_symbol(seq, pos),
115 Sequence_get_symbol(seq, pos+1),
116 Sequence_get_symbol(seq, pos+2));
117 break;
118 default:
119 g_error("Cannot process match advance of [%d]", advance);
120 break;
121 }
122 return symbol;
123 }
124
Alignment_match_get_string(Sequence * seq,gint pos,gint advance,gint max,Translate * translate)125 static gchar *Alignment_match_get_string(Sequence *seq,
126 gint pos, gint advance, gint max,
127 Translate *translate){
128 register gint ch;
129 switch(max){
130 case 1:
131 g_assert(advance == 1);
132 return g_strdup_printf("%c", Sequence_get_symbol(seq, pos));
133 break;
134 case 3:
135 switch(advance){
136 case 1:
137 g_assert(seq->alphabet->type == Alphabet_Type_PROTEIN);
138 ch = Sequence_get_symbol(seq, pos);
139 return g_strdup(Alphabet_aa2tla(ch));
140 break;
141 case 3:
142 g_assert(seq->alphabet->type == Alphabet_Type_DNA);
143 return g_strdup_printf("%c%c%c", /* codon */
144 Sequence_get_symbol(seq, pos),
145 Sequence_get_symbol(seq, pos+1),
146 Sequence_get_symbol(seq, pos+2));
147 break;
148 default:
149 g_error("Cannot handle advance [%d]", advance);
150 break;
151 }
152 break;
153 default:
154 g_error("Cannot find match display for max advance of [%d]",
155 advance);
156 break;
157 }
158 return NULL;
159 }
160
161 /**/
162
Alignment_get_gene_orientation(Alignment * alignment)163 static gchar Alignment_get_gene_orientation(Alignment *alignment){
164 register gint i;
165 register AlignmentOperation *ao;
166 for(i = 0; i < alignment->operation_list->len; i++){
167 ao = alignment->operation_list->pdata[i];
168 if(ao->transition->label == C4_Label_5SS)
169 return '+';
170 if(ao->transition->label == C4_Label_3SS)
171 return '-';
172 }
173 return '.';
174 }
175
Alignment_get_coordinate(Alignment * alignment,Sequence * query,Sequence * target,gboolean on_query,gboolean report_start)176 static gint Alignment_get_coordinate(Alignment *alignment,
177 Sequence *query,
178 Sequence *target,
179 gboolean on_query,
180 gboolean report_start){
181 register gint pos;
182 register Alignment_ArgumentSet *aas
183 = Alignment_ArgumentSet_create(NULL);
184 if(on_query){
185 if(report_start){
186 pos = alignment->region->query_start;
187 } else {
188 pos = Region_query_end(alignment->region);
189 }
190 if(aas->forward_strand_coords
191 && (query->strand == Sequence_Strand_REVCOMP)){
192 pos = query->len - pos;
193 }
194 } else { /* on_target */
195 if(report_start){
196 pos = alignment->region->target_start;
197 } else {
198 pos = Region_target_end(alignment->region);
199 }
200 if(aas->forward_strand_coords
201 && (target->strand == Sequence_Strand_REVCOMP)){
202 pos = target->len - pos;
203 }
204 }
205 return pos;
206 }
207
Alignment_convert_coordinate(Alignment * alignment,Sequence * query,Sequence * target,gint query_pos,gint target_pos,gboolean on_query)208 static gint Alignment_convert_coordinate(Alignment *alignment,
209 Sequence *query,
210 Sequence *target,
211 gint query_pos,
212 gint target_pos,
213 gboolean on_query){
214 register gint pos;
215 register Alignment_ArgumentSet *aas
216 = Alignment_ArgumentSet_create(NULL);
217 if(on_query){
218 pos = query_pos;
219 if(aas->forward_strand_coords
220 && (query->strand == Sequence_Strand_REVCOMP)){
221 pos = query->len - pos;
222 }
223 } else { /* on_target */
224 pos = target_pos;
225 if(aas->forward_strand_coords
226 && (target->strand == Sequence_Strand_REVCOMP)){
227 pos = target->len - pos;
228 }
229 }
230 return pos;
231 }
232
Alignment_get_max_pos_len(Alignment * alignment,Sequence * query,Sequence * target)233 static gint Alignment_get_max_pos_len(Alignment *alignment,
234 Sequence *query, Sequence *target){
235 register gint qmax = MAX(
236 Alignment_get_coordinate(alignment, query, target, TRUE, TRUE),
237 Alignment_get_coordinate(alignment, query, target, TRUE, FALSE));
238 register gint tmax = MAX(
239 Alignment_get_coordinate(alignment, query, target, FALSE, TRUE),
240 Alignment_get_coordinate(alignment, query, target, FALSE, FALSE));
241 register gint max = MAX(qmax, tmax);
242 register gchar *tmpstr = g_strdup_printf("%d", max);
243 register gint maxlen = strlen(tmpstr);
244 g_free(tmpstr);
245 return maxlen;
246 }
247
248 /**/
249
250 typedef struct {
251 gint query_pos;
252 gint target_pos;
253 } AlignmentPosition;
254
255 typedef struct {
256 gint query_separation;
257 gint target_separation;
258 } AlignmentSeparation;
259
260 typedef struct {
261 GString *outer_query;
262 GString *inner_query;
263 GString *middle;
264 GString *inner_target;
265 GString *outer_target;
266 GPtrArray *row_marker; /* List containing AlignmentPosition */
267 gint max_pos_len;
268 gint width;
269 gint limit;
270 gint query_intron_count;
271 gint target_intron_count;
272 gint joint_intron_count;
273 gint intron_advance_query;
274 gint intron_advance_target;
275 gchar gene_orientation;
276 gint ner_count;
277 gint ner_advance_query;
278 gint ner_advance_target;
279 gint curr_split_codon_count; /* Add 2 for each split codon */
280 GPtrArray *split_codon_separation_list;
281 /* List containing AlignmentSeparation */
282 } AlignmentView;
283
AlignmentView_create(Alignment * alignment,Sequence * query,Sequence * target)284 static AlignmentView *AlignmentView_create(Alignment *alignment,
285 Sequence *query,
286 Sequence *target){
287 register AlignmentView *av = g_new(AlignmentView, 1);
288 register AlignmentOperation *ao;
289 register AlignmentSeparation *curr_split_codon_separation = NULL;
290 register gint i;
291 register Alignment_ArgumentSet *aas
292 = Alignment_ArgumentSet_create(NULL);
293 av->outer_query = g_string_sized_new(16);
294 if(alignment->model->max_query_advance == 3)
295 av->inner_query = g_string_sized_new(16);
296 else
297 av->inner_query = NULL;
298 av->middle = g_string_sized_new(16);
299 if(alignment->model->max_target_advance == 3)
300 av->inner_target = g_string_sized_new(16);
301 else
302 av->inner_target = NULL;
303 av->outer_target = g_string_sized_new(16);
304 av->row_marker = g_ptr_array_new();
305 av->max_pos_len = Alignment_get_max_pos_len(alignment,
306 query, target);
307 av->width = aas->alignment_width-((av->max_pos_len + 5) << 1);
308 g_assert(av->width > 0);
309 av->limit = av->width;
310 av->query_intron_count = 0;
311 av->target_intron_count = 0;
312 av->joint_intron_count = 0;
313 av->intron_advance_query = 0;
314 av->intron_advance_target = 0;
315 av->gene_orientation = Alignment_get_gene_orientation(alignment);
316 av->ner_count = 0;
317 av->ner_advance_query = 0;
318 av->ner_advance_target = 0;
319 av->curr_split_codon_count = 0;
320 av->split_codon_separation_list = g_ptr_array_new();
321 for(i = 0; i < alignment->operation_list->len; i++){
322 ao = alignment->operation_list->pdata[i];
323 if(curr_split_codon_separation){
324 if(ao->transition->label == C4_Label_SPLIT_CODON){
325 g_ptr_array_add(av->split_codon_separation_list,
326 curr_split_codon_separation);
327 curr_split_codon_separation = NULL;
328 } else {
329 curr_split_codon_separation->query_separation
330 += (ao->length * ao->transition->advance_query);
331 curr_split_codon_separation->target_separation
332 += (ao->length * ao->transition->advance_target);
333 }
334 } else {
335 if(ao->transition->label == C4_Label_SPLIT_CODON){
336 curr_split_codon_separation
337 = g_new(AlignmentSeparation, 1);
338 curr_split_codon_separation->query_separation
339 = (ao->length * ao->transition->advance_query);
340 curr_split_codon_separation->target_separation
341 = (ao->length * ao->transition->advance_target);
342 }
343 }
344 }
345 /* Check we have an even number of split codons */
346 g_assert(!curr_split_codon_separation);
347 return av;
348 }
349
AlignmentView_destroy(AlignmentView * av)350 static void AlignmentView_destroy(AlignmentView *av){
351 register gint i;
352 for(i = 0; i < av->split_codon_separation_list->len; i++)
353 g_free(av->split_codon_separation_list->pdata[i]);
354 g_ptr_array_free(av->split_codon_separation_list, TRUE);
355 g_string_free(av->outer_query, TRUE);
356 g_string_free(av->middle, TRUE);
357 g_string_free(av->outer_target, TRUE);
358
359 if(av->inner_query)
360 g_string_free(av->inner_query, TRUE);
361 if(av->inner_target)
362 g_string_free(av->inner_target, TRUE);
363
364 for(i = 0; i < av->row_marker->len; i++)
365 g_free(av->row_marker->pdata[i]);
366 g_ptr_array_free(av->row_marker, TRUE);
367 g_free(av);
368 return;
369 }
370
AlignmentView_add(AlignmentView * av,gchar * query_string,gchar * inner_query_string,gchar * match_string,gchar * inner_target_string,gchar * target_string,gint query_pos,gint target_pos)371 static void AlignmentView_add(AlignmentView *av,
372 gchar *query_string,
373 gchar *inner_query_string,
374 gchar *match_string,
375 gchar *inner_target_string,
376 gchar *target_string,
377 gint query_pos, gint target_pos){
378 register AlignmentPosition *apos;
379 register gint i;
380 g_assert(strlen(query_string) == strlen(match_string));
381 g_assert(strlen(match_string) == strlen(target_string));
382 if(av->inner_query){
383 if(inner_query_string){
384 g_assert(strlen(match_string)
385 == strlen(inner_query_string));
386 g_string_append(av->inner_query, inner_query_string);
387 } else {
388 for(i = strlen(match_string)-1; i >=0; i--)
389 g_string_append_c(av->inner_query, ' ');
390 }
391 }
392 if(av->inner_target){
393 if(inner_target_string){
394 g_assert(strlen(match_string)
395 == strlen(inner_target_string));
396 g_string_append(av->inner_target, inner_target_string);
397 } else {
398 for(i = strlen(match_string)-1; i >=0; i--)
399 g_string_append_c(av->inner_target, ' ');
400 }
401 }
402 g_string_append(av->outer_query, query_string);
403 g_string_append(av->middle, match_string);
404 g_string_append(av->outer_target, target_string);
405 if(av->outer_query->len >= av->limit){
406 apos = g_new(AlignmentPosition, 1);
407 apos->query_pos = query_pos;
408 apos->target_pos = target_pos;
409 g_ptr_array_add(av->row_marker, apos);
410 av->limit += av->width;
411 }
412 return;
413 }
414
415 typedef struct {
416 gchar *match_string;
417 gchar *codon;
418 } AlignmentMatchData;
419
Alignment_match_translate_reverse(gchar * dna,gint length,gpointer user_data)420 static void Alignment_match_translate_reverse(gchar *dna, gint length,
421 gpointer user_data){
422 register AlignmentMatchData *amd = user_data;
423 register gint i;
424 for(i = 0; i < 3; i++)
425 if(dna[i] == amd->codon[i])
426 amd->match_string[i] = '!';
427 return;
428 }
429
Alignment_get_equiv_symbol(gchar symbol_a,gchar symbol_b,Submat * submat)430 static gchar Alignment_get_equiv_symbol(gchar symbol_a, gchar symbol_b,
431 Submat *submat){
432 register gint score;
433 g_assert(symbol_a);
434 g_assert(symbol_b);
435 if(submat){
436 score = Submat_lookup(submat, symbol_a, symbol_b);
437 if(score == 0)
438 return '.';
439 if(score > 0){
440 if(toupper(symbol_a) == toupper(symbol_b))
441 return '|';
442 else
443 return ':';
444 }
445 } else {
446 if(symbol_a == symbol_b)
447 return '|';
448 }
449 return ' ';
450 }
451
Alignment_get_codon_match_string(gchar * codon,gchar aa,Submat * protein_submat,Translate * translate)452 static gchar *Alignment_get_codon_match_string(gchar *codon, gchar aa,
453 Submat *protein_submat, Translate *translate){
454 register gchar codon_aa = Translate_codon(translate, codon);
455 register gchar match_symbol;
456 register gchar *codon_match;
457 gchar aa_seq[2];
458 AlignmentMatchData amd;
459 g_assert(translate);
460 g_assert(protein_submat);
461 match_symbol = Alignment_get_equiv_symbol(codon_aa, aa,
462 protein_submat);
463 codon_match = g_strnfill(3, match_symbol);
464 if(match_symbol != '|'){
465 amd.match_string = codon_match;
466 amd.codon = codon;
467 aa_seq[0] = aa;
468 aa_seq[1] = '\0';
469 Translate_reverse(translate, aa_seq, 1,
470 Alignment_match_translate_reverse, &amd);
471 }
472 return codon_match;
473 }
474
AlignmentView_add_MATCH(AlignmentView * av,C4_Transition * transition,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Submat * dna_submat,Submat * protein_submat,Translate * translate)475 static void AlignmentView_add_MATCH(AlignmentView *av,
476 C4_Transition *transition,
477 gint total_length, Sequence *query, Sequence *target,
478 gint query_pos, gint target_pos,
479 Submat *dna_submat, Submat *protein_submat,
480 Translate *translate){
481 register gchar query_symbol, target_symbol;
482 register gchar *query_string, *target_string;
483 register gchar *inner_query_string = NULL,
484 *inner_target_string = NULL;
485 register gint max_advance;
486 register gint i;
487 register gint curr_query_pos = query_pos,
488 curr_target_pos = target_pos;
489 register Match *match = (Match*)transition->label_data;
490 gchar match_string[4]; /* FIXME: temp: should be max_advance long */
491 for(i = 0; i < total_length; i++){
492 max_advance = MAX(transition->advance_query,
493 transition->advance_target);
494 query_string = Alignment_match_get_string(query,
495 curr_query_pos, transition->advance_query,
496 max_advance, translate);
497 target_string = Alignment_match_get_string(target,
498 curr_target_pos, transition->advance_target,
499 max_advance, translate);
500 query_symbol = Alignment_match_get_symbol(query,
501 curr_query_pos, transition->advance_query,
502 translate);
503 target_symbol = Alignment_match_get_symbol(target,
504 curr_target_pos, transition->advance_target,
505 translate);
506 if(transition->advance_query == 3)
507 inner_query_string = Alphabet_aa2tla(query_symbol);
508 if(transition->advance_target == 3)
509 inner_target_string = Alphabet_aa2tla(target_symbol);
510 if(match){
511 match->display_func(match, query, target,
512 curr_query_pos, curr_target_pos,
513 match_string);
514 } else { /* In the absense of Match */
515 g_assert(transition->advance_query == 1);
516 g_assert(transition->advance_target == 1);
517 match_string[0] = (Sequence_get_symbol(query, query_pos)
518 == Sequence_get_symbol(target, target_pos))
519 ?'|':' ';
520 match_string[1] = '\0';
521 }
522 AlignmentView_add(av, query_string, inner_query_string,
523 match_string,
524 inner_target_string, target_string,
525 curr_query_pos, curr_target_pos);
526 g_free(query_string);
527 g_free(target_string);
528 curr_query_pos += transition->advance_query;
529 curr_target_pos += transition->advance_target;
530 }
531 return;
532 }
533
AlignmentView_add_GAP(AlignmentView * av,gint advance_query,gint advance_target,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Translate * translate)534 static void AlignmentView_add_GAP(AlignmentView *av,
535 gint advance_query, gint advance_target,
536 gint total_length, Sequence *query, Sequence *target,
537 gint query_pos, gint target_pos, Translate *translate){
538 register gint i, j;
539 register gint curr_query_pos = query_pos,
540 curr_target_pos = target_pos;
541 register gchar *seq_string = g_new(gchar, 4),
542 *match_string = g_new(gchar, 4),
543 *gap_string = g_new(gchar, 4);
544 register Alphabet_Type emitted_alphabet_type;
545 register gchar tr_codon, *tr_codon_name;
546 register gboolean is_translating
547 = ( ( (query->alphabet->type == Alphabet_Type_PROTEIN)
548 && (target->alphabet->type == Alphabet_Type_DNA))
549 ||( (query->alphabet->type == Alphabet_Type_DNA)
550 && (target->alphabet->type == Alphabet_Type_PROTEIN))
551 || ((advance_query|advance_target) == 3));
552 g_assert(!(advance_query && advance_target));
553 match_string[0] = ' ';
554 gap_string[0] = '-';
555 if(advance_query)
556 emitted_alphabet_type = query->alphabet->type;
557 else /* advance_target */
558 emitted_alphabet_type = target->alphabet->type;
559 for(i = 0; i < total_length; i++){
560 for(j = 0; j < (advance_query|advance_target); j++){
561 if(advance_query)
562 seq_string[j] = Sequence_get_symbol(query, curr_query_pos+j);
563 else /* advance_target */
564 seq_string[j] = Sequence_get_symbol(target, curr_target_pos+j);
565 match_string[j] = match_string[0];
566 gap_string[j] = gap_string[0];
567 }
568 seq_string[j] = match_string[j] = gap_string[j] = '\0';
569 tr_codon_name = NULL;
570 if(is_translating){
571 if(emitted_alphabet_type == Alphabet_Type_PROTEIN){
572 strncpy(seq_string, Alphabet_aa2tla(seq_string[0]), 3);
573 match_string[2] = match_string[1] = match_string[0];
574 gap_string[2] = gap_string[1] = gap_string[0];
575 seq_string[3] = match_string[3] = gap_string[3] = '\0';
576 }
577 if((advance_query|advance_target) == 3){
578 gap_string[0] = '<';
579 gap_string[1] = '-';
580 gap_string[2] = '>';
581 gap_string[3] = '\0';
582 tr_codon = Translate_codon(translate, seq_string);
583 tr_codon_name = Alphabet_aa2tla(tr_codon);
584 }
585 }
586 if(advance_query)
587 AlignmentView_add(av, seq_string,
588 tr_codon_name,
589 match_string,
590 is_translating?gap_string:NULL,
591 gap_string,
592 curr_query_pos, curr_target_pos);
593 else
594 AlignmentView_add(av, gap_string,
595 is_translating?gap_string:NULL,
596 match_string,
597 tr_codon_name,
598 seq_string,
599 curr_query_pos, curr_target_pos);
600 curr_query_pos += advance_query;
601 curr_target_pos += advance_target;
602 }
603 g_free(seq_string);
604 g_free(match_string);
605 g_free(gap_string);
606 return;
607 }
608 /* Display gap:[- N] codon[<-> NNN] frameshift [-*N]
609 */
610
AlignmentView_set_consensus_ss_string(AlignmentView * av,gboolean is_5_prime,gchar * splice_site,gchar * consensus_string)611 static void AlignmentView_set_consensus_ss_string(AlignmentView *av,
612 gboolean is_5_prime, gchar *splice_site, gchar *consensus_string){
613 register gchar cons_a, cons_b;
614 if(av->gene_orientation == '+'){
615 if(is_5_prime){ /* FWD: gt..ag */
616 cons_a = 'G';
617 cons_b = 'T';
618 } else {
619 cons_a = 'A';
620 cons_b = 'G';
621 }
622 } else {
623 g_assert(av->gene_orientation == '-');
624 if(is_5_prime){ /* REV: ct..ac */
625 cons_a = 'A';
626 cons_b = 'C';
627 } else {
628 cons_a = 'C';
629 cons_b = 'T';
630 }
631 }
632 if(toupper(splice_site[0]) == cons_a)
633 consensus_string[0] = '+';
634 else
635 consensus_string[0] = '-';
636 if(toupper(splice_site[1]) == cons_b)
637 consensus_string[1] = '+';
638 else
639 consensus_string[1] = '-';
640 return;
641 }
642
AlignmentView_add_SPLICE_SITE(AlignmentView * av,gint advance_query,gint advance_target,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,gboolean is_5_prime,C4_Transition * last_match)643 static void AlignmentView_add_SPLICE_SITE(AlignmentView *av,
644 gint advance_query, gint advance_target,
645 gint total_length, Sequence *query, Sequence *target,
646 gint query_pos, gint target_pos, gboolean is_5_prime,
647 C4_Transition *last_match){
648 register gchar *gap_string = " ";
649 static gchar qy_seq_string[3], tg_seq_string[3],
650 qy_cons_string[3], tg_cons_string[3];
651 qy_cons_string[0] = qy_cons_string[1] = ' ';
652 tg_cons_string[0] = tg_cons_string[1] = ' ';
653 qy_cons_string[2] = tg_cons_string[2] = '\0';
654 qy_seq_string[2] = tg_seq_string[2] = '\0';
655 if(advance_query == 2){
656 qy_seq_string[0] = Sequence_get_symbol(query, query_pos);
657 qy_seq_string[1] = Sequence_get_symbol(query, query_pos+1);
658 AlignmentView_set_consensus_ss_string(av, is_5_prime,
659 qy_seq_string, qy_cons_string);
660 g_strdown(qy_seq_string);
661 }
662 if(advance_target == 2){
663 tg_seq_string[0] = Sequence_get_symbol(target, target_pos);
664 tg_seq_string[1] = Sequence_get_symbol(target, target_pos+1);
665 AlignmentView_set_consensus_ss_string(av, is_5_prime,
666 tg_seq_string, tg_cons_string);
667 g_strdown(tg_seq_string);
668 }
669 if(advance_query == 2){
670 if(advance_target == 2){ /* Joint intron */
671 AlignmentView_add(av, qy_seq_string,
672 qy_cons_string, gap_string, tg_cons_string,
673 tg_seq_string,
674 query_pos, target_pos);
675 } else { /* Query intron */
676 g_assert(advance_target == 0);
677 g_strdown(qy_seq_string);
678 g_assert(last_match);
679 if(last_match->advance_query == 3)
680 AlignmentView_add(av, qy_seq_string, qy_cons_string, gap_string,
681 gap_string, gap_string, query_pos, target_pos);
682 else
683 AlignmentView_add(av, qy_seq_string,
684 NULL, qy_cons_string, NULL,
685 gap_string, query_pos, target_pos);
686 }
687 } else { /* Target intron */
688 g_assert(advance_query == 0);
689 g_assert(advance_target == 2);
690 g_strdown(tg_seq_string);
691 g_assert(last_match);
692 if(last_match->advance_target == 3)
693 AlignmentView_add(av, gap_string, gap_string,
694 gap_string, tg_cons_string, tg_seq_string,
695 query_pos, target_pos);
696 else
697 AlignmentView_add(av, gap_string,
698 NULL, tg_cons_string, NULL,
699 tg_seq_string, query_pos, target_pos);
700 }
701 return;
702 }
703
AlignmentView_add_INTRON(AlignmentView * av,gint advance_query,gint advance_target,Sequence * query,Sequence * target,gint query_pos,gint target_pos,C4_Transition * last_match)704 static void AlignmentView_add_INTRON(AlignmentView *av,
705 gint advance_query, gint advance_target,
706 Sequence *query, Sequence *target,
707 gint query_pos, gint target_pos,
708 C4_Transition *last_match){
709 register gchar *dir_sign = "????", *label = NULL;
710 register gint fill, intron_count = 0;
711 register gchar *name_string, *middle_string, *gap_string,
712 *pad_string, *intron_name;
713 g_assert(last_match);
714 if(av->gene_orientation == '+'){
715 dir_sign = ">>>>";
716 } else if(av->gene_orientation == '-'){
717 dir_sign = "<<<<";
718 }
719 if(advance_query){
720 if(advance_target){
721 intron_count = ++av->joint_intron_count;
722 intron_name = "Joint";
723 label = g_strdup_printf("%d bp // %d bp",
724 advance_query+4, advance_target+4);
725 } else {
726 intron_count = ++av->query_intron_count;
727 intron_name = "Query";
728 label = g_strdup_printf("%d bp", advance_query+4);
729 }
730 } else {
731 intron_count = ++av->target_intron_count;
732 intron_name = "Target";
733 label = g_strdup_printf("%d bp", advance_target+4);
734 }
735 name_string = g_strdup_printf("%s %s Intron %d %s",
736 dir_sign, intron_name, intron_count, dir_sign);
737 g_assert(strlen(name_string) > strlen(label));
738 fill = (strlen(name_string)-strlen(label))+1;
739 middle_string = g_strdup_printf("%*c%s%*c",
740 ((fill|1)>>1), ' ', label, ((fill-1)>>1), ' ');
741 gap_string = g_strnfill(strlen(name_string), '.');
742 pad_string = g_strnfill(strlen(name_string), '^');
743 g_assert(strlen(name_string) == strlen(middle_string));
744 g_assert(strlen(middle_string) == strlen(gap_string));
745 if(advance_query){
746 if(advance_target){ /* joint intron */
747 AlignmentView_add(av, name_string, NULL, middle_string, NULL,
748 name_string, query_pos, target_pos);
749 } else { /* query intron */
750 if(last_match->advance_query == 3)
751 AlignmentView_add(av, gap_string, pad_string, middle_string,
752 pad_string, name_string, query_pos, target_pos);
753 else
754 AlignmentView_add(av, gap_string, NULL, middle_string, NULL,
755 name_string, query_pos, target_pos);
756 }
757 } else { /* target intron */
758 if(last_match->advance_target == 3)
759 AlignmentView_add(av, name_string, pad_string, middle_string,
760 pad_string, gap_string, query_pos, target_pos);
761 else
762 AlignmentView_add(av, name_string, NULL, middle_string, NULL,
763 gap_string, query_pos, target_pos);
764 }
765 g_free(label);
766 g_free(name_string);
767 g_free(gap_string);
768 g_free(pad_string);
769 g_free(middle_string);
770 return;
771 }
772
AlignmentView_add_NER(AlignmentView * av,gint advance_query,gint advance_target,gint query_pos,gint target_pos)773 static void AlignmentView_add_NER(AlignmentView *av,
774 gint advance_query, gint advance_target,
775 gint query_pos, gint target_pos){
776 register gchar
777 *upper_string = g_strdup_printf("%d", advance_query),
778 *middle_string = g_strdup_printf("NER %d", ++av->ner_count),
779 *lower_string = g_strdup_printf("%d", advance_target);
780 register gint upper_len = strlen(upper_string),
781 middle_len = strlen(middle_string),
782 lower_len = strlen(lower_string),
783 max_len;
784 register gchar *upper_padded, *middle_padded, *lower_padded;
785 max_len = upper_len;
786 if(max_len < middle_len)
787 max_len = middle_len;
788 if(max_len < lower_len)
789 max_len = lower_len;
790 upper_padded = g_strdup_printf("--<%*c%s%*c>--",
791 1+(((max_len-upper_len)+1)>>1), ' ',
792 upper_string,
793 1+((max_len-upper_len)>>1), ' ');
794 middle_padded = g_strdup_printf("--<%*c%s%*c>--",
795 1+(((max_len-middle_len)+1)>>1), ' ',
796 middle_string,
797 1+((max_len-middle_len)>>1), ' ');
798 lower_padded = g_strdup_printf("--<%*c%s%*c>--",
799 1+(((max_len-lower_len)+1)>>1), ' ',
800 lower_string,
801 1+((max_len-lower_len)>>1), ' ');
802 g_free(upper_string);
803 g_free(middle_string);
804 g_free(lower_string);
805 AlignmentView_add(av, upper_padded, NULL, middle_padded, NULL,
806 lower_padded, query_pos, target_pos);
807 g_free(upper_padded);
808 g_free(middle_padded);
809 g_free(lower_padded);
810 return;
811 }
812
813 #define AlignmentView_combine_advance(advance_query, advance_target) \
814 (((advance_query) << 8) | (advance_target))
815
AlignmentView_add_SPLIT_CODON(AlignmentView * av,Sequence * query,Sequence * target,gint advance_query,gint advance_target,gint query_pos,gint target_pos,Submat * protein_submat,Translate * translate)816 static void AlignmentView_add_SPLIT_CODON(AlignmentView *av,
817 Sequence *query, Sequence *target,
818 gint advance_query, gint advance_target,
819 gint query_pos, gint target_pos,
820 Submat *protein_submat, Translate *translate){
821 register gchar *query_string = NULL, *target_string = NULL,
822 *match_string = NULL, *codon_match;
823 register gchar qy_aa_symbol = '\0', tg_aa_symbol = '\0',
824 *qy_aa_name = NULL, *tg_aa_name = NULL,
825 *tr_codon_name;
826 register gchar *inner_query_string = NULL,
827 *inner_target_string = NULL;
828 register gint pos_to_start = -1, num_to_print;
829 register gint qp0 = 0, qp1 = 0, qp2 = 0,
830 tp0 = 0, tp1 = 0, tp2 = 0;
831 register gboolean query_is_dna, target_is_dna, before_intron;
832 gchar qy_codon[4], tg_codon[4];
833 register AlignmentSeparation *as
834 = av->split_codon_separation_list->pdata
835 [av->curr_split_codon_count >> 1];
836 g_assert(advance_query || advance_target);
837 query_is_dna = (query->alphabet->type == Alphabet_Type_DNA);
838 target_is_dna = (target->alphabet->type == Alphabet_Type_DNA);
839 before_intron = (av->curr_split_codon_count & 1)?FALSE:TRUE;
840 qy_codon[0] = qy_codon[1] = qy_codon[2] = qy_codon[3] = '\0';
841 tg_codon[0] = tg_codon[1] = tg_codon[2] = tg_codon[3] = '\0';
842 /**/
843 if(query_is_dna && target_is_dna){
844 switch(AlignmentView_combine_advance(
845 advance_query, advance_target)){
846 case AlignmentView_combine_advance(1, 1):
847 if(before_intron){
848 pos_to_start = 0;
849 qp0 = query_pos;
850 qp1 = query_pos + as->query_separation;
851 qp2 = query_pos + as->query_separation + 1;
852 tp0 = target_pos;
853 tp1 = target_pos + as->target_separation;
854 tp2 = target_pos + as->target_separation + 1;
855 } else { /* after_intron */
856 pos_to_start = 2;
857 qp0 = query_pos - as->query_separation;
858 qp1 = query_pos - as->query_separation + 1;
859 qp2 = query_pos;
860 tp0 = target_pos - as->target_separation;
861 tp1 = target_pos - as->target_separation + 1;
862 tp2 = target_pos;
863 }
864 break;
865 case AlignmentView_combine_advance(2, 2):
866 if(before_intron){
867 pos_to_start = 0;
868 qp0 = query_pos;
869 qp1 = query_pos + 1;
870 qp2 = query_pos + as->query_separation;
871 tp0 = target_pos;
872 tp1 = target_pos + 1;
873 tp2 = target_pos + as->target_separation;
874 } else { /* after_intron */
875 pos_to_start = 1;
876 qp0 = query_pos - as->query_separation;
877 qp1 = query_pos;
878 qp2 = query_pos + 1;
879 tp0 = target_pos - as->target_separation;
880 tp1 = target_pos;
881 tp2 = target_pos + 1;
882 }
883 break;
884 default:
885 g_error("Unexpected d2d split codon [%d,%d]",
886 advance_query, advance_target);
887 break;
888 }
889 qy_codon[0] = Sequence_get_symbol(query, qp0);
890 qy_codon[1] = Sequence_get_symbol(query, qp1);
891 qy_codon[2] = Sequence_get_symbol(query, qp2);
892 tg_codon[0] = Sequence_get_symbol(target, tp0);
893 tg_codon[1] = Sequence_get_symbol(target, tp1);
894 tg_codon[2] = Sequence_get_symbol(target, tp2);
895 } else {
896 if(query_is_dna){
897 g_assert(target->alphabet->type == Alphabet_Type_PROTEIN);
898 tg_aa_symbol = Sequence_get_symbol(target, target_pos);
899 switch(AlignmentView_combine_advance(
900 advance_query, advance_target)){
901 case AlignmentView_combine_advance(1, 0):
902 pos_to_start = 0;
903 qp0 = query_pos;
904 qp1 = query_pos + as->query_separation;
905 qp2 = query_pos + as->query_separation + 1;
906 break;
907 case AlignmentView_combine_advance(2, 0):
908 pos_to_start = 0;
909 qp0 = query_pos;
910 qp1 = query_pos + 1;
911 qp2 = query_pos + as->query_separation;
912 break;
913 case AlignmentView_combine_advance(2, 1):
914 pos_to_start = 1;
915 qp0 = query_pos - as->query_separation;
916 qp1 = query_pos;
917 qp2 = query_pos + 1;
918 break;
919 case AlignmentView_combine_advance(1, 1):
920 pos_to_start = 2;
921 qp0 = query_pos - as->query_separation;
922 qp1 = query_pos - as->query_separation + 1;
923 qp2 = query_pos;
924 break;
925 default:
926 g_error("Unexpected d2p split codon [%d,%d]",
927 advance_query, advance_target);
928 break;
929 }
930 qy_codon[0] = Sequence_get_symbol(query, qp0);
931 qy_codon[1] = Sequence_get_symbol(query, qp1);
932 qy_codon[2] = Sequence_get_symbol(query, qp2);
933 } else {
934 g_assert(query->alphabet->type == Alphabet_Type_PROTEIN);
935 g_assert(target->alphabet->type == Alphabet_Type_DNA);
936 qy_aa_symbol = Sequence_get_symbol(query, query_pos);
937 switch(AlignmentView_combine_advance(
938 advance_query, advance_target)){
939 case AlignmentView_combine_advance(0, 1):
940 pos_to_start = 0;
941 tp0 = target_pos;
942 tp1 = target_pos + as->target_separation;
943 tp2 = target_pos + as->target_separation + 1;
944 break;
945 case AlignmentView_combine_advance(0, 2):
946 pos_to_start = 0;
947 tp0 = target_pos;
948 tp1 = target_pos + 1;
949 tp2 = target_pos + as->target_separation;
950 break;
951 case AlignmentView_combine_advance(1, 2):
952 pos_to_start = 1;
953 tp0 = target_pos - as->target_separation;
954 tp1 = target_pos;
955 tp2 = target_pos + 1;
956 break;
957 case AlignmentView_combine_advance(1, 1):
958 pos_to_start = 2;
959 tp0 = target_pos - as->target_separation;
960 tp1 = target_pos - as->target_separation + 1;
961 tp2 = target_pos;
962 break;
963 default:
964 g_error("Unexpected p2d split codon [%d,%d]",
965 advance_query, advance_target);
966 break;
967 }
968 tg_codon[0] = Sequence_get_symbol(target, tp0);
969 tg_codon[1] = Sequence_get_symbol(target, tp1);
970 tg_codon[2] = Sequence_get_symbol(target, tp2);
971 }
972 }
973 g_assert(pos_to_start != -1);
974 av->curr_split_codon_count++;
975 /**/
976 if(!query_is_dna)
977 qy_aa_name = Alphabet_aa2tla(qy_aa_symbol);
978 if(!target_is_dna)
979 tg_aa_name = Alphabet_aa2tla(tg_aa_symbol);
980 num_to_print = MAX(advance_query, advance_target);
981 query_string = g_strdup_printf("{%.*s}",
982 num_to_print,
983 (query_is_dna?qy_codon:qy_aa_name)+pos_to_start);
984 target_string = g_strdup_printf("{%.*s}",
985 num_to_print,
986 (target_is_dna?tg_codon:tg_aa_name)+pos_to_start);
987 g_strup(qy_codon);
988 g_strup(tg_codon);
989 if(query_is_dna){
990 g_assert(!qy_aa_symbol);
991 qy_aa_symbol = Translate_codon(translate, qy_codon);
992 tr_codon_name = Alphabet_aa2tla(qy_aa_symbol);
993 inner_query_string = g_strdup_printf("{%.*s}",
994 num_to_print, tr_codon_name+pos_to_start);
995 }
996 if(target_is_dna){
997 g_assert(!tg_aa_symbol);
998 tg_aa_symbol = Translate_codon(translate, tg_codon);
999 tr_codon_name = Alphabet_aa2tla(tg_aa_symbol);
1000 inner_target_string = g_strdup_printf("{%.*s}",
1001 num_to_print, tr_codon_name+pos_to_start);
1002 }
1003 /**/
1004 if(query_is_dna){
1005 if(target_is_dna){ /* d,d */
1006 codon_match = g_strnfill(3, Alignment_get_equiv_symbol(
1007 qy_aa_symbol, tg_aa_symbol,
1008 protein_submat));
1009 } else { /* d,p */
1010 codon_match = Alignment_get_codon_match_string(
1011 qy_codon, tg_aa_symbol,
1012 protein_submat, translate);
1013 }
1014 } else { /* p,d */
1015 codon_match = Alignment_get_codon_match_string(
1016 tg_codon, qy_aa_symbol,
1017 protein_submat, translate);
1018 }
1019 match_string = g_strdup_printf("{%.*s}",
1020 num_to_print, codon_match+pos_to_start);
1021 g_free(codon_match);
1022 /**/
1023 g_assert(query_string);
1024 g_assert(match_string);
1025 g_assert(target_string);
1026 AlignmentView_add(av, query_string, inner_query_string,
1027 match_string, inner_target_string,
1028 target_string, query_pos, target_pos);
1029 g_free(query_string);
1030 g_free(match_string);
1031 g_free(target_string);
1032 if(inner_query_string)
1033 g_free(inner_query_string);
1034 if(inner_target_string)
1035 g_free(inner_target_string);
1036 return;
1037 }
1038
AlignmentView_add_FRAMESHIFT(AlignmentView * av,gint advance_query,gint advance_target,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Translate * translate)1039 static void AlignmentView_add_FRAMESHIFT(AlignmentView *av,
1040 gint advance_query, gint advance_target,
1041 gint total_length, Sequence *query, Sequence *target,
1042 gint query_pos, gint target_pos, Translate *translate){
1043 register gint i, j;
1044 register gint curr_query_pos = query_pos,
1045 curr_target_pos = target_pos;
1046 static gchar seq_string[4], match_string[4], gap_string[4];
1047 register Alphabet_Type emitted_alphabet_type;
1048 g_assert(!(advance_query && advance_target));
1049 match_string[0] = '#'; /* Frameshift */
1050 gap_string[0] = '-';
1051 if(advance_query)
1052 emitted_alphabet_type = query->alphabet->type;
1053 else /* advance_target */
1054 emitted_alphabet_type = target->alphabet->type;
1055 for(i = 0; i < total_length; i++){
1056 for(j = 0; j < (advance_query|advance_target); j++){
1057 if(advance_query)
1058 seq_string[j] = Sequence_get_symbol(query, curr_query_pos+j);
1059 else /* advance_target */
1060 seq_string[j] = Sequence_get_symbol(target, curr_target_pos+j);
1061 match_string[j] = match_string[0];
1062 gap_string[j] = gap_string[0];
1063 }
1064 seq_string[j] = match_string[j] = gap_string[j] = '\0';
1065 if(emitted_alphabet_type == Alphabet_Type_PROTEIN){
1066 strncpy(seq_string, Alphabet_aa2tla(seq_string[0]), 3);
1067 match_string[2] = match_string[1] = match_string[0];
1068 gap_string[2] = gap_string[1] = gap_string[0];
1069 seq_string[3] = match_string[3] = gap_string[3] = '\0';
1070 }
1071 if(advance_query)
1072 AlignmentView_add(av, seq_string,
1073 match_string,
1074 match_string,
1075 gap_string,
1076 gap_string,
1077 curr_query_pos, curr_target_pos);
1078 else
1079 AlignmentView_add(av, gap_string,
1080 gap_string,
1081 match_string,
1082 match_string,
1083 seq_string,
1084 curr_query_pos, curr_target_pos);
1085 curr_query_pos += advance_query;
1086 curr_target_pos += advance_target;
1087 }
1088 return;
1089 }
1090
AlignmentView_add_label_operation(AlignmentView * av,C4_Transition * transition,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Submat * dna_submat,Submat * protein_submat,Translate * translate,gboolean next_has_same_label,C4_Transition ** last_match)1091 static void AlignmentView_add_label_operation(AlignmentView *av,
1092 C4_Transition *transition,
1093 gint total_length, Sequence *query, Sequence *target,
1094 gint query_pos, gint target_pos,
1095 Submat *dna_submat, Submat *protein_submat,
1096 Translate *translate,
1097 gboolean next_has_same_label, C4_Transition **last_match){
1098 switch(transition->label){
1099 case C4_Label_NONE:
1100 g_assert(!transition->advance_query);
1101 g_assert(!transition->advance_target);
1102 break;
1103 case C4_Label_MATCH:
1104 (*last_match) = transition;
1105 AlignmentView_add_MATCH(av, transition,
1106 total_length, query, target,
1107 query_pos, target_pos,
1108 dna_submat, protein_submat, translate);
1109 break;
1110 case C4_Label_GAP:
1111 AlignmentView_add_GAP(av,
1112 transition->advance_query,
1113 transition->advance_target,
1114 total_length, query, target,
1115 query_pos, target_pos, translate);
1116 break;
1117 case C4_Label_5SS: /*fallthrough*/
1118 AlignmentView_add_SPLICE_SITE(av,
1119 transition->advance_query,
1120 transition->advance_target,
1121 total_length, query, target,
1122 query_pos, target_pos, TRUE, (*last_match));
1123 break;
1124 case C4_Label_3SS:
1125 AlignmentView_add_SPLICE_SITE(av,
1126 transition->advance_query,
1127 transition->advance_target,
1128 total_length, query, target,
1129 query_pos, target_pos, FALSE, (*last_match));
1130 break;
1131 case C4_Label_INTRON:
1132 av->intron_advance_query += (transition->advance_query
1133 * total_length);
1134 av->intron_advance_target += (transition->advance_target
1135 * total_length);
1136 if(!next_has_same_label){
1137 AlignmentView_add_INTRON(av,
1138 av->intron_advance_query,
1139 av->intron_advance_target,
1140 query, target,
1141 query_pos, target_pos, (*last_match));
1142 av->intron_advance_query = 0;
1143 av->intron_advance_target = 0;
1144 }
1145 break;
1146 case C4_Label_NER:
1147 av->ner_advance_query += (transition->advance_query
1148 * total_length);
1149 av->ner_advance_target += (transition->advance_target
1150 * total_length);
1151 if(!next_has_same_label){
1152 AlignmentView_add_NER(av, av->ner_advance_query,
1153 av->ner_advance_target,
1154 query_pos, target_pos);
1155 av->ner_advance_query = 0;
1156 av->ner_advance_target = 0;
1157 }
1158 break;
1159 case C4_Label_SPLIT_CODON:
1160 g_assert(total_length == 1);
1161 AlignmentView_add_SPLIT_CODON(av, query, target,
1162 transition->advance_query,
1163 transition->advance_target,
1164 query_pos, target_pos,
1165 protein_submat, translate);
1166 break;
1167 case C4_Label_FRAMESHIFT:
1168 AlignmentView_add_FRAMESHIFT(av,
1169 transition->advance_query,
1170 transition->advance_target,
1171 total_length, query, target,
1172 query_pos, target_pos, translate);
1173 break;
1174 default:
1175 g_error("AlignmentView cannot use label [%s]",
1176 C4_Label_get_name(transition->label));
1177 break;
1178 }
1179 return;
1180 }
1181
AlignmentView_prepare(AlignmentView * av,Alignment * alignment,Sequence * query,Sequence * target,Submat * dna_submat,Submat * protein_submat,Translate * translate)1182 static void AlignmentView_prepare(AlignmentView *av,
1183 Alignment *alignment,
1184 Sequence *query, Sequence *target,
1185 Submat *dna_submat, Submat *protein_submat,
1186 Translate *translate){
1187 register gint i;
1188 register gint query_pos = alignment->region->query_start,
1189 target_pos = alignment->region->target_start;
1190 register AlignmentPosition *apos;
1191 register AlignmentOperation *ao, *prev_ao;
1192 register gint total_length;
1193 C4_Transition *last_match = NULL;
1194 apos = g_new(AlignmentPosition, 1);
1195 apos->query_pos = alignment->region->query_start-1;
1196 apos->target_pos = alignment->region->target_start-1;
1197 g_ptr_array_add(av->row_marker, apos);
1198 prev_ao = alignment->operation_list->pdata[0];
1199 total_length = prev_ao->length;
1200 for(i = 1; i < alignment->operation_list->len; i++){
1201 ao = alignment->operation_list->pdata[i];
1202 if(prev_ao->transition == ao->transition){
1203 /* group */
1204 total_length += ao->length;
1205 } else {
1206 /* report */
1207 AlignmentView_add_label_operation(av, prev_ao->transition,
1208 total_length, query, target, query_pos, target_pos,
1209 dna_submat, protein_submat, translate,
1210 (prev_ao->transition->label == ao->transition->label),
1211 &last_match);
1212 query_pos += (prev_ao->transition->advance_query
1213 * total_length);
1214 target_pos += (prev_ao->transition->advance_target
1215 * total_length);
1216 /* start new */
1217 prev_ao = ao;
1218 total_length = prev_ao->length;
1219 }
1220 }
1221 AlignmentView_add_label_operation(av, prev_ao->transition,
1222 total_length, query, target, query_pos, target_pos,
1223 dna_submat, protein_submat, translate, FALSE,
1224 &last_match);
1225 apos = g_new(AlignmentPosition, 1);
1226 apos->query_pos = Region_query_end(alignment->region)-1;
1227 apos->target_pos = Region_target_end(alignment->region)-1;
1228 g_ptr_array_add(av->row_marker, apos);
1229 return;
1230 }
1231
AlignmentView_string_is_empty(GString * str,gint pos,gint width)1232 static gboolean AlignmentView_string_is_empty(GString *str, gint pos,
1233 gint width){
1234 register gint i;
1235 for(i = 0; i < width; i++)
1236 if(str->str[pos+i] != ' ')
1237 return FALSE;
1238 return TRUE;
1239 }
1240
AlignmentView_prepare_seq(GString * outer,GString * inner,gint pos,gint width)1241 static void AlignmentView_prepare_seq(GString *outer, GString *inner,
1242 gint pos, gint width){
1243 register gint i;
1244 register gchar t;
1245 for(i = 0; i < width; i++){
1246 if(inner->str[pos+i] == ' '){ /* Swap empty */
1247 t = inner->str[pos+i];
1248 inner->str[pos+i] = outer->str[pos+i];
1249 outer->str[pos+i] = t;
1250 continue;
1251 }
1252 if(inner->str[pos+i] == '^') /* Replace padding */
1253 inner->str[pos+i] = ' ';
1254 }
1255 return;
1256 }
1257
AlignmentView_replace_padding(GString * str,gint pos,gint width)1258 static void AlignmentView_replace_padding(GString *str, gint pos, gint width){
1259 register gint i;
1260 for(i = 0; i < width; i++){
1261 if(str->str[pos+i] == '^')
1262 str->str[pos+i] = ' ';
1263 }
1264 return;
1265 }
1266
AlignmentView_display_row(AlignmentView * av,gint row,gint pos,gint width,gint maxposlen,Sequence * query,Sequence * target,FILE * fp)1267 static void AlignmentView_display_row(AlignmentView *av,
1268 gint row, gint pos, gint width, gint maxposlen,
1269 Sequence *query, Sequence *target, FILE *fp){
1270 register AlignmentPosition *apos1 = av->row_marker->pdata[row],
1271 *apos2 = av->row_marker->pdata[row+1];
1272 register gint p1q, p1t, p2q, p2t;
1273 register Alignment_ArgumentSet *aas
1274 = Alignment_ArgumentSet_create(NULL);
1275 register gboolean show_inner_query = FALSE,
1276 show_inner_target = FALSE;
1277 p1q = apos1->query_pos+1;
1278 p2q = apos2->query_pos+1;
1279 p1t = apos1->target_pos+1;
1280 p2t = apos2->target_pos+1;
1281 if(aas->forward_strand_coords){
1282 if(query->strand == Sequence_Strand_REVCOMP){
1283 p1q = query->len - p1q - 1;
1284 p2q = query->len - p2q + 1;
1285 }
1286 if(target->strand == Sequence_Strand_REVCOMP){
1287 p1t = target->len - p1t - 1;
1288 p2t = target->len - p2t + 1;
1289 }
1290 }
1291 if(av->inner_query
1292 && (!AlignmentView_string_is_empty(av->inner_query, pos, width))){
1293 show_inner_query = TRUE;
1294 AlignmentView_prepare_seq(av->outer_query,
1295 av->inner_query, pos, width);
1296 }
1297 if(av->inner_target
1298 && (!AlignmentView_string_is_empty(av->inner_target, pos, width))){
1299 show_inner_target = TRUE;
1300 AlignmentView_prepare_seq(av->outer_target,
1301 av->inner_target, pos, width);
1302 }
1303 AlignmentView_replace_padding(av->outer_query, pos, width);
1304 AlignmentView_replace_padding(av->outer_target, pos, width);
1305 fprintf(fp, " %*d : %.*s : %*d\n", maxposlen, p1q+1,
1306 width, av->outer_query->str+pos,
1307 maxposlen, p2q);
1308 if(show_inner_query)
1309 fprintf(fp, " %*s %.*s\n", maxposlen, " ", width,
1310 av->inner_query->str+pos);
1311 fprintf(fp, " %*s %.*s\n", maxposlen, " ", width,
1312 av->middle->str+pos);
1313 if(show_inner_target)
1314 fprintf(fp, " %*s %.*s\n", maxposlen, " ", width,
1315 av->inner_target->str+pos);
1316 fprintf(fp, " %*d : %.*s : %*d\n", maxposlen, p1t+1,
1317 width, av->outer_target->str+pos,
1318 maxposlen, p2t);
1319 return;
1320 }
1321
AlignmentView_display(AlignmentView * av,Sequence * query,Sequence * target,FILE * fp)1322 static void AlignmentView_display(AlignmentView *av,
1323 Sequence *query, Sequence *target,
1324 FILE *fp){
1325 register gint pos = 0, pause, row = 0;
1326 pause = av->outer_query->len-av->width;
1327 while(pos < pause){
1328 AlignmentView_display_row(av, row, pos, av->width,
1329 av->max_pos_len, query, target, fp);
1330 pos += av->width;
1331 row++;
1332 fprintf(fp, "\n");
1333 }
1334 AlignmentView_display_row(av, row, pos, av->outer_query->len-pos,
1335 av->max_pos_len, query, target, fp);
1336 fprintf(fp, "\n");
1337 return;
1338 }
1339
1340 /**/
1341
Alignment_display(Alignment * alignment,Sequence * query,Sequence * target,Submat * dna_submat,Submat * protein_submat,Translate * translate,FILE * fp)1342 void Alignment_display(Alignment *alignment,
1343 Sequence *query, Sequence *target,
1344 Submat *dna_submat, Submat *protein_submat,
1345 Translate *translate, FILE *fp){
1346 register AlignmentView *av = AlignmentView_create(alignment,
1347 query, target);
1348 g_assert(alignment);
1349 g_assert(!alignment->model->is_open);
1350 /* Display header */
1351 fprintf(fp, "\n"
1352 "C4 Alignment:\n"
1353 "------------\n"
1354 " Query: %s%s%s\n"
1355 " Target: %s%s%s\n"
1356 " Model: %s\n"
1357 " Raw score: %d\n"
1358 " Query range: %d -> %d\n"
1359 " Target range: %d -> %d\n\n",
1360 query->id, query->def?" ":"", query->def?query->def:"",
1361 target->id, target->def?" ":"", target->def?target->def:"",
1362 alignment->model->name,
1363 alignment->score,
1364 Alignment_get_coordinate(alignment, query, target,
1365 TRUE, TRUE),
1366 Alignment_get_coordinate(alignment, query, target,
1367 TRUE, FALSE),
1368 Alignment_get_coordinate(alignment, query, target,
1369 FALSE, TRUE),
1370 Alignment_get_coordinate(alignment, query, target,
1371 FALSE, FALSE));
1372 AlignmentView_prepare(av, alignment, query, target,
1373 dna_submat, protein_submat, translate);
1374 AlignmentView_display(av, query, target, fp);
1375 AlignmentView_destroy(av);
1376 return;
1377 }
1378
1379 /**/
1380
Alignment_get_equivalenced_matching(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data)1381 static gint Alignment_get_equivalenced_matching(Alignment *alignment,
1382 Sequence *query, Sequence *target,
1383 Translate *translate, gboolean report_id,
1384 gpointer user_data){
1385 register gint i, j, match = 0;
1386 register AlignmentOperation *ao;
1387 register gint query_pos = alignment->region->query_start,
1388 target_pos = alignment->region->target_start;
1389 register gchar qy_symbol, tg_symbol;
1390 for(i = 0; i < alignment->operation_list->len; i++){
1391 ao = alignment->operation_list->pdata[i];
1392 if(ao->transition->label == C4_Label_MATCH){
1393 for(j = 0; j < ao->length; j++){
1394 qy_symbol = Alignment_match_get_symbol(query, query_pos,
1395 ao->transition->advance_query,
1396 translate);
1397 tg_symbol = Alignment_match_get_symbol(target,
1398 target_pos,
1399 ao->transition->advance_target,
1400 translate);
1401 if(report_id){
1402 if(toupper(qy_symbol) == toupper(tg_symbol))
1403 match++;
1404 } else { /* report similarity */
1405 if(C4_Calc_score(ao->transition->calc, query_pos,
1406 target_pos, user_data) > 0)
1407 match++;
1408 }
1409 query_pos += ao->transition->advance_query;
1410 target_pos += ao->transition->advance_target;
1411 }
1412 } else {
1413 query_pos += (ao->transition->advance_query
1414 * ao->length);
1415 target_pos += (ao->transition->advance_target
1416 * ao->length);
1417 }
1418 }
1419 return match;
1420 }
1421 /* Reports the %id over the equivalenced regions of the alignment
1422 */
1423
Alignment_get_equivalenced_matching_region(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data,gint exon_query_start,gint exon_query_end)1424 static gint Alignment_get_equivalenced_matching_region(Alignment *alignment,
1425 Sequence *query, Sequence *target,
1426 Translate *translate, gboolean report_id,
1427 gpointer user_data, gint exon_query_start, gint exon_query_end){
1428 register gint i, j, match = 0;
1429 register AlignmentOperation *ao;
1430 register gint query_pos = alignment->region->query_start,
1431 target_pos = alignment->region->target_start;
1432 register gchar qy_symbol, tg_symbol;
1433 for(i = 0; i < alignment->operation_list->len; i++){
1434 ao = alignment->operation_list->pdata[i];
1435 if(ao->transition->label == C4_Label_MATCH){
1436 for(j = 0; j < ao->length; j++){
1437 if(query_pos > exon_query_end)
1438 return match;
1439 if(query_pos >= exon_query_start){
1440 qy_symbol = Alignment_match_get_symbol(query, query_pos,
1441 ao->transition->advance_query,
1442 translate);
1443 tg_symbol = Alignment_match_get_symbol(target,
1444 target_pos,
1445 ao->transition->advance_target,
1446 translate);
1447 if(report_id){
1448 if(toupper(qy_symbol) == toupper(tg_symbol))
1449 match++;
1450 } else { /* report similarity */
1451 if(C4_Calc_score(ao->transition->calc, query_pos,
1452 target_pos, user_data) > 0)
1453 match++;
1454 }
1455 }
1456 query_pos += ao->transition->advance_query;
1457 target_pos += ao->transition->advance_target;
1458 }
1459 } else {
1460 query_pos += (ao->transition->advance_query
1461 * ao->length);
1462 target_pos += (ao->transition->advance_target
1463 * ao->length);
1464 }
1465 }
1466 return match;
1467 }
1468 /* Reports the %id over the equivalenced regions of the alignment
1469 */
1470
Alignment_get_equivalenced_total(Alignment * alignment)1471 static gint Alignment_get_equivalenced_total(Alignment *alignment){
1472 register gint i, total = 0;
1473 register AlignmentOperation *ao;
1474 for(i = 0; i < alignment->operation_list->len; i++){
1475 ao = alignment->operation_list->pdata[i];
1476 if(ao->transition->label == C4_Label_MATCH)
1477 total += ao->length;
1478 }
1479 return total;
1480 }
1481
Alignment_get_equivalenced_total_region(Alignment * alignment,gint exon_query_start,gint exon_query_end)1482 static gint Alignment_get_equivalenced_total_region(Alignment *alignment,
1483 gint exon_query_start, gint exon_query_end){
1484 register gint i, j, total = 0;
1485 register gint query_pos = alignment->region->query_start,
1486 target_pos = alignment->region->target_start;
1487 register AlignmentOperation *ao;
1488 for(i = 0; i < alignment->operation_list->len; i++){
1489 ao = alignment->operation_list->pdata[i];
1490 if(ao->transition->label == C4_Label_MATCH){
1491 for(j = 0; j < ao->length; j++){
1492 if(query_pos > exon_query_end)
1493 return total;
1494 if(query_pos >= exon_query_start)
1495 total++;
1496 query_pos += ao->transition->advance_query;
1497 target_pos += ao->transition->advance_target;
1498 }
1499 } else {
1500 query_pos += (ao->transition->advance_query
1501 * ao->length);
1502 target_pos += (ao->transition->advance_target
1503 * ao->length);
1504 }
1505 }
1506 return total;
1507 }
1508 /* FIXME: optimisation: should use gene object to avoid full alignment pass
1509 */
1510
Alignment_get_percent_score_region(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data,gint exon_query_start,gint exon_query_end)1511 static gfloat Alignment_get_percent_score_region(Alignment *alignment,
1512 Sequence *query, Sequence *target,
1513 Translate *translate, gboolean report_id, gpointer user_data,
1514 gint exon_query_start, gint exon_query_end){
1515 return (((gfloat)Alignment_get_equivalenced_matching_region(alignment,
1516 query, target, translate, report_id, user_data,
1517 exon_query_start, exon_query_end))
1518 / ((gfloat)Alignment_get_equivalenced_total_region(alignment,
1519 exon_query_start, exon_query_end)))*100;
1520 }
1521 /* FIXME: should also count split codons */
1522
Alignment_get_percent_score(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data)1523 static gfloat Alignment_get_percent_score(Alignment *alignment,
1524 Sequence *query, Sequence *target,
1525 Translate *translate, gboolean report_id,
1526 gpointer user_data){
1527 return (((gfloat)Alignment_get_equivalenced_matching(alignment,
1528 query, target, translate, report_id, user_data))
1529 / ((gfloat)Alignment_get_equivalenced_total(alignment)))*100;
1530 }
1531 /* FIXME: should also count split codons */
1532
Alignment_get_match_score(Alignment * alignment,Sequence * query,Sequence * target,gpointer user_data)1533 static gint Alignment_get_match_score(Alignment *alignment,
1534 Sequence *query, Sequence *target,
1535 gpointer user_data){
1536 register gint i, j;
1537 register AlignmentOperation *ao;
1538 register C4_Score score = 0;
1539 register gint query_pos = alignment->region->query_start,
1540 target_pos = alignment->region->target_start;
1541 for(i = 0; i < alignment->operation_list->len; i++){
1542 ao = alignment->operation_list->pdata[i];
1543 for(j = 0; j < ao->length; j++){
1544 if(ao->transition->label == C4_Label_MATCH){
1545 score += C4_Calc_score(ao->transition->calc, query_pos,
1546 target_pos, user_data);
1547 }
1548 query_pos += ao->transition->advance_query;
1549 target_pos += ao->transition->advance_target;
1550 }
1551 }
1552 g_message("MATCH SCORE [%d]", score);
1553 return score;
1554 }
1555
Alignment_get_self_match_score(Alignment * alignment,Sequence * query,Sequence * target,gpointer self_data)1556 static gint Alignment_get_self_match_score(Alignment *alignment,
1557 Sequence *query, Sequence *target,
1558 gpointer self_data){
1559 register gint i, j;
1560 register AlignmentOperation *ao;
1561 register C4_Score score = 0;
1562 register gint query_pos = alignment->region->query_start;
1563 for(i = 0; i < alignment->operation_list->len; i++){
1564 ao = alignment->operation_list->pdata[i];
1565 for(j = 0; j < ao->length; j++){
1566 if(ao->transition->label == C4_Label_MATCH){
1567 /* Use self_data to find self-comparison scores */
1568 score += C4_Calc_score(ao->transition->calc, query_pos,
1569 query_pos, self_data);
1570 }
1571 query_pos += ao->transition->advance_query;
1572 }
1573 }
1574 g_message("SELF MATCH SCORE [%d]", score);
1575 return score;
1576 }
1577
Alignment_get_percent_self(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gpointer user_data,gpointer self_data)1578 static gfloat Alignment_get_percent_self(Alignment *alignment,
1579 Sequence *query, Sequence *target,
1580 Translate *translate, gpointer user_data, gpointer self_data){
1581 return (((gfloat)Alignment_get_match_score(alignment,
1582 query, target, user_data))
1583 / ((gfloat)Alignment_get_self_match_score(alignment, query, target,
1584 self_data)))*100;
1585 }
1586 /* Returns score as a percentage of maximum possible
1587 * over the equivalenced transitions
1588 */
1589
1590 /**/
1591
Alignment_print_sugar_block(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)1592 static void Alignment_print_sugar_block(Alignment *alignment,
1593 Sequence *query, Sequence *target, FILE *fp){
1594 fprintf(fp, "%s %d %d %c %s %d %d %c %d",
1595 query->id,
1596 Alignment_get_coordinate(alignment, query, target,
1597 TRUE, TRUE),
1598 Alignment_get_coordinate(alignment, query, target,
1599 TRUE, FALSE),
1600 Sequence_get_strand_as_char(query),
1601 target->id,
1602 Alignment_get_coordinate(alignment, query, target,
1603 FALSE, TRUE),
1604 Alignment_get_coordinate(alignment, query, target,
1605 FALSE, FALSE),
1606 Sequence_get_strand_as_char(target),
1607 alignment->score);
1608 return;
1609 }
1610
Alignment_get_cigar_type(AlignmentOperation * ao,gint * move)1611 static gchar Alignment_get_cigar_type(AlignmentOperation *ao,
1612 gint *move){
1613 if(!ao->transition->advance_query){
1614 (*move) = ao->transition->advance_target * ao->length;
1615 return 'D';
1616 }
1617 if(!ao->transition->advance_target){
1618 (*move) = ao->transition->advance_query * ao->length;
1619 return 'I';
1620 }
1621 (*move) = MAX(ao->transition->advance_query,
1622 ao->transition->advance_target) * ao->length;
1623 return 'M';
1624 }
1625
Alignment_print_cigar_block(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)1626 static void Alignment_print_cigar_block(Alignment *alignment,
1627 Sequence *query, Sequence *target, FILE *fp){
1628 register gint i = 0;
1629 register gchar *gap = "";
1630 register AlignmentOperation *ao;
1631 register gchar type, next_type;
1632 gint move, next_move;
1633 ao = alignment->operation_list->pdata[i];
1634 type = Alignment_get_cigar_type(ao, &move);
1635 for(i = 1; i < alignment->operation_list->len; i++){
1636 ao = alignment->operation_list->pdata[i];
1637 next_type = Alignment_get_cigar_type(ao, &next_move);
1638 if(type == next_type){
1639 move += next_move;
1640 } else {
1641 if(move)
1642 fprintf(fp, "%s%c %d", gap, type, move);
1643 move = next_move;
1644 type = next_type;
1645 gap = " ";
1646 }
1647 }
1648 if(move)
1649 fprintf(fp, "%s%c %d", gap, type, move);
1650 return;
1651 }
1652
Alignment_print_vulgar_block(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)1653 static void Alignment_print_vulgar_block(Alignment *alignment,
1654 Sequence *query, Sequence *target, FILE *fp){
1655 register gint i;
1656 register gchar *gap = "";
1657 register C4_Label curr_label;
1658 register gint curr_advance_query, curr_advance_target;
1659 register AlignmentOperation *ao;
1660 register gboolean curr_is_codon = FALSE;
1661 ao = alignment->operation_list->pdata[0];
1662 curr_label = ao->transition->label;
1663 curr_advance_query = (ao->transition->advance_query * ao->length);
1664 curr_advance_target = (ao->transition->advance_target * ao->length);
1665 for(i = 1; i < alignment->operation_list->len; i++){
1666 ao = alignment->operation_list->pdata[i];
1667 if((ao->transition->label == curr_label)
1668 && (curr_advance_query || (!ao->transition->advance_query))
1669 && (curr_advance_target || (!ao->transition->advance_target))
1670 && (curr_is_codon == ((ao->transition->advance_query == 3)
1671 && (ao->transition->advance_target == 3)))){
1672 curr_advance_query += (ao->transition->advance_query
1673 * ao->length);
1674 curr_advance_target += (ao->transition->advance_target
1675 * ao->length);
1676 } else {
1677 switch(curr_label){
1678 case C4_Label_NONE:
1679 break;
1680 case C4_Label_MATCH:
1681 g_assert(curr_advance_query && curr_advance_target);
1682 fprintf(fp, "%s%c %d %d", gap, curr_is_codon?'C':'M',
1683 curr_advance_query,
1684 curr_advance_target);
1685 gap = " ";
1686 break;
1687 case C4_Label_GAP:
1688 g_assert(curr_advance_query || curr_advance_target);
1689 g_assert(! (curr_advance_query
1690 && curr_advance_target));
1691 fprintf(fp, "%sG %d %d", gap, curr_advance_query,
1692 curr_advance_target);
1693 gap = " ";
1694 break;
1695 case C4_Label_NER:
1696 fprintf(fp, "%sN %d %d", gap, curr_advance_query,
1697 curr_advance_target);
1698 gap = " ";
1699 break;
1700 case C4_Label_5SS:
1701 fprintf(fp, "%s5 %d %d", gap, curr_advance_query,
1702 curr_advance_target);
1703 gap = " ";
1704 break;
1705 case C4_Label_3SS:
1706 fprintf(fp, "%s3 %d %d", gap, curr_advance_query,
1707 curr_advance_target);
1708 gap = " ";
1709 break;
1710 case C4_Label_INTRON:
1711 fprintf(fp, "%sI %d %d", gap, curr_advance_query,
1712 curr_advance_target);
1713 gap = " ";
1714 break;
1715 case C4_Label_SPLIT_CODON:
1716 fprintf(fp, "%sS %d %d", gap, curr_advance_query,
1717 curr_advance_target);
1718 gap = " ";
1719 break;
1720 case C4_Label_FRAMESHIFT:
1721 fprintf(fp, "%sF %d %d", gap, curr_advance_query,
1722 curr_advance_target);
1723 gap = " ";
1724 break;
1725 default:
1726 g_error("Unknown C4_Label [%d]", curr_label);
1727 break;
1728 }
1729 curr_label = ao->transition->label;
1730 curr_is_codon = ((ao->transition->advance_query == 3)
1731 && (ao->transition->advance_target == 3));
1732 curr_advance_query = (ao->transition->advance_query
1733 * ao->length);
1734 curr_advance_target = (ao->transition->advance_target
1735 * ao->length);
1736 }
1737 }
1738 return;
1739 }
1740
1741 typedef enum {
1742 Alignment_RYO_TOKEN_STRING,
1743 /**/
1744 Alignment_RYO_TOKEN_ID, /* [QT] */
1745 Alignment_RYO_TOKEN_DEF, /* [QT] */
1746 Alignment_RYO_TOKEN_LEN, /* [QT] */
1747 Alignment_RYO_TOKEN_STRAND, /* [QT] */
1748 Alignment_RYO_TOKEN_SEQ, /* [QT] */
1749 Alignment_RYO_TOKEN_TYPE, /* [QT] */
1750 /**/
1751 Alignment_RYO_TOKEN_ALIGN_BEGIN, /* [QT] */
1752 Alignment_RYO_TOKEN_ALIGN_END, /* [QT] */
1753 Alignment_RYO_TOKEN_ALIGN_LEN, /* [QT] */
1754 Alignment_RYO_TOKEN_ALIGN_SEQ, /* [QT] */
1755 /**/
1756 Alignment_RYO_TOKEN_CODING_BEGIN, /* [QT] */
1757 Alignment_RYO_TOKEN_CODING_END, /* [QT] */
1758 Alignment_RYO_TOKEN_CODING_LEN, /* [QT] */
1759 Alignment_RYO_TOKEN_CODING_SEQ, /* [QT] */
1760 /**/
1761 Alignment_RYO_TOKEN_SCORE,
1762 Alignment_RYO_TOKEN_MODEL_NAME,
1763 Alignment_RYO_TOKEN_RANK,
1764 Alignment_RYO_TOKEN_PERCENT_ID,
1765 Alignment_RYO_TOKEN_PERCENT_SIMILARITY,
1766 Alignment_RYO_TOKEN_PERCENT_SELF,
1767 Alignment_RYO_TOKEN_GENE_ORIENTATION,
1768 Alignment_RYO_TOKEN_EQUIVALENCED_TOTAL,
1769 Alignment_RYO_TOKEN_EQUIVALENCED_IDENTCAL,
1770 Alignment_RYO_TOKEN_EQUIVALENCED_SIMILAR,
1771 Alignment_RYO_TOKEN_EQUIVALENCED_MISMATCHES,
1772 /**/
1773 Alignment_RYO_TOKEN_SUGAR_BLOCK,
1774 Alignment_RYO_TOKEN_CIGAR_BLOCK,
1775 Alignment_RYO_TOKEN_VULGAR_BLOCK,
1776 /**/
1777 Alignment_RYO_TOKEN_PTO_OPEN,
1778 Alignment_RYO_TOKEN_PTO_CLOSE,
1779 /**/
1780 Alignment_RYO_TOKEN_PTO_SEQ, /* [QT] */
1781 Alignment_RYO_TOKEN_PTO_ADVANCE, /* [QT] */
1782 Alignment_RYO_TOKEN_PTO_BEGIN, /* [QT] */
1783 Alignment_RYO_TOKEN_PTO_END, /* [QT] */
1784 /**/
1785 Alignment_RYO_TOKEN_PTO_NAME,
1786 Alignment_RYO_TOKEN_PTO_SCORE,
1787 Alignment_RYO_TOKEN_PTO_LABEL
1788 } Alignment_RYO_Token;
1789
1790 typedef struct {
1791 Alignment_RYO_Token token;
1792 GString *str; /* For TOKEN_STRING */
1793 gboolean on_query; /* For [QT] */
1794 } Alignment_RYO_ComplexToken;
1795
Alignment_RYO_ComplexToken_create(Alignment_RYO_Token token,gchar * str,gboolean on_query)1796 static Alignment_RYO_ComplexToken *Alignment_RYO_ComplexToken_create(
1797 Alignment_RYO_Token token, gchar *str, gboolean on_query){
1798 register Alignment_RYO_ComplexToken *rct
1799 = g_new(Alignment_RYO_ComplexToken, 1);
1800 rct->token = token;
1801 if(str)
1802 rct->str = g_string_new(str);
1803 else
1804 rct->str = NULL;
1805 rct->on_query = on_query;
1806 return rct;
1807 }
1808
Alignment_RYO_ComplexToken_destroy(Alignment_RYO_ComplexToken * rct)1809 static void Alignment_RYO_ComplexToken_destroy(
1810 Alignment_RYO_ComplexToken *rct){
1811 if(rct->str)
1812 g_string_free(rct->str, TRUE);
1813 g_free(rct);
1814 return;
1815 }
1816
Alignment_RYO_add_string(GPtrArray * token_list,gchar * str)1817 static void Alignment_RYO_add_string(GPtrArray *token_list, gchar *str){
1818 register Alignment_RYO_ComplexToken *rct = NULL;
1819 if(token_list->len){
1820 rct = token_list->pdata[token_list->len-1];
1821 if(rct->token != Alignment_RYO_TOKEN_STRING)
1822 rct = NULL;
1823 }
1824 if(rct){
1825 g_assert(rct->str);
1826 g_string_append(rct->str, str);
1827 } else {
1828 rct = Alignment_RYO_ComplexToken_create(
1829 Alignment_RYO_TOKEN_STRING, str, FALSE);
1830 g_ptr_array_add(token_list, rct);
1831 }
1832 return;
1833 }
1834
Alignment_RYO_add_token(GPtrArray * token_list,Alignment_RYO_Token token,gboolean on_query)1835 static void Alignment_RYO_add_token(GPtrArray *token_list,
1836 Alignment_RYO_Token token,
1837 gboolean on_query){
1838 register Alignment_RYO_ComplexToken *rct
1839 = Alignment_RYO_ComplexToken_create(token, NULL, on_query);
1840 g_ptr_array_add(token_list, rct);
1841 return;
1842 }
1843
Alignment_RYO_tokenise_strand(GPtrArray * token_list,gchar * format,gint pos,gboolean on_query)1844 static gint Alignment_RYO_tokenise_strand(GPtrArray *token_list,
1845 gchar *format, gint pos, gboolean on_query){
1846 register gint move = 1;
1847 switch(format[pos]){
1848 case 'i':
1849 Alignment_RYO_add_token(token_list,
1850 Alignment_RYO_TOKEN_ID, on_query);
1851 break;
1852 case 'd':
1853 Alignment_RYO_add_token(token_list,
1854 Alignment_RYO_TOKEN_DEF, on_query);
1855 break;
1856 case 'l':
1857 Alignment_RYO_add_token(token_list,
1858 Alignment_RYO_TOKEN_LEN, on_query);
1859 break;
1860 case 's':
1861 Alignment_RYO_add_token(token_list,
1862 Alignment_RYO_TOKEN_SEQ, on_query);
1863 break;
1864 case 'S':
1865 Alignment_RYO_add_token(token_list,
1866 Alignment_RYO_TOKEN_STRAND, on_query);
1867 break;
1868 case 't':
1869 Alignment_RYO_add_token(token_list,
1870 Alignment_RYO_TOKEN_TYPE, on_query);
1871 break;
1872 case 'a':
1873 switch(format[pos+1]){
1874 case 'b':
1875 Alignment_RYO_add_token(token_list,
1876 Alignment_RYO_TOKEN_ALIGN_BEGIN, on_query);
1877 break;
1878 case 'e':
1879 Alignment_RYO_add_token(token_list,
1880 Alignment_RYO_TOKEN_ALIGN_END, on_query);
1881 break;
1882 case 'l':
1883 Alignment_RYO_add_token(token_list,
1884 Alignment_RYO_TOKEN_ALIGN_LEN, on_query);
1885 break;
1886 case 's':
1887 Alignment_RYO_add_token(token_list,
1888 Alignment_RYO_TOKEN_ALIGN_SEQ, on_query);
1889 break;
1890 default:
1891 g_error("Unknown [%%%c%c%c] in format string [%s]",
1892 format[pos-1], format[pos], format[pos+1],
1893 format);
1894 break;
1895 }
1896 move++;
1897 break;
1898 case 'c':
1899 switch(format[pos+1]){
1900 case 'b':
1901 Alignment_RYO_add_token(token_list,
1902 Alignment_RYO_TOKEN_CODING_BEGIN, on_query);
1903 break;
1904 case 'e':
1905 Alignment_RYO_add_token(token_list,
1906 Alignment_RYO_TOKEN_CODING_END, on_query);
1907 break;
1908 case 'l':
1909 Alignment_RYO_add_token(token_list,
1910 Alignment_RYO_TOKEN_CODING_LEN, on_query);
1911 break;
1912 case 's':
1913 Alignment_RYO_add_token(token_list,
1914 Alignment_RYO_TOKEN_CODING_SEQ, on_query);
1915 break;
1916
1917 default:
1918 g_error("Unknown [%%%c%c%c] in format string [%s]",
1919 format[pos-1], format[pos], format[pos+1],
1920 format);
1921 break;
1922 }
1923 move++;
1924 break;
1925 default:
1926 g_error("Unknown [%%%c%c] in format string [%s]",
1927 format[pos-1], format[pos], format);
1928 break;
1929 }
1930 return move;
1931 }
1932
Alignment_RYO_tokenise_PTO_strand(GPtrArray * token_list,gchar * format,gint pos,gboolean on_query)1933 static gint Alignment_RYO_tokenise_PTO_strand(GPtrArray *token_list,
1934 gchar *format, gint pos, gboolean on_query){
1935 register gint move = 1;
1936 switch(format[pos]){
1937 case 's':
1938 Alignment_RYO_add_token(token_list,
1939 Alignment_RYO_TOKEN_PTO_SEQ, on_query);
1940 break;
1941 case 'a':
1942 Alignment_RYO_add_token(token_list,
1943 Alignment_RYO_TOKEN_PTO_ADVANCE, on_query);
1944 break;
1945 case 'b':
1946 Alignment_RYO_add_token(token_list,
1947 Alignment_RYO_TOKEN_PTO_BEGIN, on_query);
1948 break;
1949 case 'e':
1950 Alignment_RYO_add_token(token_list,
1951 Alignment_RYO_TOKEN_PTO_END, on_query);
1952 break;
1953 default:
1954 g_error("Unknown [%%%c%c%c] in format string [%s]",
1955 format[pos-2], format[pos-1], format[pos], format);
1956 break;
1957 }
1958 return move;
1959 }
1960
Alignment_RYO_tokenise_PTO(GPtrArray * token_list,gchar * format,gint pos)1961 static gint Alignment_RYO_tokenise_PTO(GPtrArray *token_list,
1962 gchar *format, gint pos){
1963 register gint move = 1;
1964 switch(format[pos]){
1965 case 'q':
1966 move += Alignment_RYO_tokenise_PTO_strand(token_list,
1967 format, pos+1, TRUE);
1968 break;
1969 case 't':
1970 move += Alignment_RYO_tokenise_PTO_strand(token_list,
1971 format, pos+1, FALSE);
1972 break;
1973 case 'n':
1974 Alignment_RYO_add_token(token_list,
1975 Alignment_RYO_TOKEN_PTO_NAME, FALSE);
1976 break;
1977 case 's':
1978 Alignment_RYO_add_token(token_list,
1979 Alignment_RYO_TOKEN_PTO_SCORE, FALSE);
1980 break;
1981 case 'l':
1982 Alignment_RYO_add_token(token_list,
1983 Alignment_RYO_TOKEN_PTO_LABEL, FALSE);
1984 break;
1985 default:
1986 g_error("Unknown [%%%c%c] in format string [%s]",
1987 format[pos-1], format[pos], format);
1988 break;
1989 }
1990 return move;
1991 }
1992
Alignment_RYO_tokenise(gchar * format)1993 static GPtrArray *Alignment_RYO_tokenise(gchar *format){
1994 register gint i;
1995 register GPtrArray *token_list = g_ptr_array_new();
1996 gchar mini_str[2];
1997 mini_str[1] = '\0';
1998 for(i = 0; format[i]; i++){
1999 switch(format[i]){
2000 case '\\':
2001 switch(format[i+1]){
2002 case '\\':
2003 Alignment_RYO_add_string(token_list, "\\");
2004 break;
2005 case 'n':
2006 Alignment_RYO_add_string(token_list, "\n");
2007 break;
2008 case 't':
2009 Alignment_RYO_add_string(token_list, "\t");
2010 break;
2011 case '{':
2012 Alignment_RYO_add_string(token_list, "{");
2013 break;
2014 case '}':
2015 Alignment_RYO_add_string(token_list, "}");
2016 break;
2017 default:
2018 g_error("Unknown [\\%c] in ryo string [%s]",
2019 format[i+1], format);
2020 break;
2021 }
2022 i++;
2023 break;
2024 case '%':
2025 switch(format[i+1]){
2026 case '%':
2027 Alignment_RYO_add_string(token_list, "%");
2028 break;
2029 case 'q':
2030 i += Alignment_RYO_tokenise_strand(token_list,
2031 format, i+2, TRUE);
2032 break;
2033 case 't':
2034 i += Alignment_RYO_tokenise_strand(token_list,
2035 format, i+2, FALSE);
2036 break;
2037 case 's':
2038 Alignment_RYO_add_token(token_list,
2039 Alignment_RYO_TOKEN_SCORE, FALSE);
2040 break;
2041 case 'm':
2042 Alignment_RYO_add_token(token_list,
2043 Alignment_RYO_TOKEN_MODEL_NAME, FALSE);
2044 break;
2045 case 'r':
2046 Alignment_RYO_add_token(token_list,
2047 Alignment_RYO_TOKEN_RANK, FALSE);
2048 break;
2049 case 'p':
2050 switch(format[i+2]){
2051 case 'i':
2052 Alignment_RYO_add_token(token_list,
2053 Alignment_RYO_TOKEN_PERCENT_ID, FALSE);
2054 break;
2055 case 's':
2056 Alignment_RYO_add_token(token_list,
2057 Alignment_RYO_TOKEN_PERCENT_SIMILARITY,
2058 FALSE);
2059 break;
2060 case 'S':
2061 Alignment_RYO_add_token(token_list,
2062 Alignment_RYO_TOKEN_PERCENT_SELF,
2063 FALSE);
2064 break;
2065 default:
2066 g_error(
2067 "Unknown [%%%c%c] in format string",
2068 format[i+1], format[i+2]);
2069 break;
2070 }
2071 i++;
2072 break;
2073 case 'e':
2074 switch(format[i+2]){
2075 case 't':
2076 Alignment_RYO_add_token(token_list,
2077 Alignment_RYO_TOKEN_EQUIVALENCED_TOTAL,
2078 FALSE);
2079 break;
2080 case 'i':
2081 Alignment_RYO_add_token(token_list,
2082 Alignment_RYO_TOKEN_EQUIVALENCED_IDENTCAL,
2083 FALSE);
2084 break;
2085 case 's':
2086 Alignment_RYO_add_token(token_list,
2087 Alignment_RYO_TOKEN_EQUIVALENCED_SIMILAR,
2088 FALSE);
2089 break;
2090 case 'm':
2091 Alignment_RYO_add_token(token_list,
2092 Alignment_RYO_TOKEN_EQUIVALENCED_MISMATCHES,
2093 FALSE);
2094 break;
2095 default:
2096 g_error(
2097 "Unknown [%%%c%c] in format string",
2098 format[i+1], format[i+2]);
2099 break;
2100 }
2101 i++;
2102 break;
2103 case 'g':
2104 Alignment_RYO_add_token(token_list,
2105 Alignment_RYO_TOKEN_GENE_ORIENTATION, FALSE);
2106 break;
2107 case 'S':
2108 Alignment_RYO_add_token(token_list,
2109 Alignment_RYO_TOKEN_SUGAR_BLOCK, FALSE);
2110 break;
2111 case 'C':
2112 Alignment_RYO_add_token(token_list,
2113 Alignment_RYO_TOKEN_CIGAR_BLOCK, FALSE);
2114 break;
2115 case 'V':
2116 Alignment_RYO_add_token(token_list,
2117 Alignment_RYO_TOKEN_VULGAR_BLOCK, FALSE);
2118 break;
2119 case 'P':
2120 i += Alignment_RYO_tokenise_PTO(token_list,
2121 format, i+2);
2122 break;
2123 default:
2124 g_error("Unknown [%%%c] in format string [%s]",
2125 format[i+1], format);
2126 break;
2127 }
2128 i++;
2129 break;
2130 case '{':
2131 Alignment_RYO_add_token(token_list,
2132 Alignment_RYO_TOKEN_PTO_OPEN, FALSE);
2133 break;
2134 case '}':
2135 Alignment_RYO_add_token(token_list,
2136 Alignment_RYO_TOKEN_PTO_CLOSE, FALSE);
2137 break;
2138 default:
2139 mini_str[0] = format[i];
2140 Alignment_RYO_add_string(token_list, mini_str);
2141 break;
2142 }
2143 }
2144 return token_list;
2145 }
2146 /* %[qt][idlsSt] {query,target}{id,def,len,seq,Strand,type]
2147 * %[qt]a[bels] {query,target}align{begin,end,len,seq}
2148 * %s score
2149 * %m model_name
2150 * %r rank
2151 * %p[isS] percent {id,similarity,self}
2152 * %e[tid] equivalenced {total,identical,similar}
2153 * %g gene_orientation
2154 * %S sugar block
2155 * %C cigar block
2156 * %V vulgar block
2157 * %% %
2158 * \n newline
2159 * \t tab
2160 * \\ \
2161 *
2162 * \{ {
2163 * \} }
2164 * { start per-transition section
2165 * } end per-transition section
2166 * %P[qt][sabe] per-transition-output{query,target}
2167 * {seq,advance,begin,end}
2168 * %P[nsl] per-transition-output{name,score,label}
2169 */
2170
Alignment_RYO_token_list_destroy(GPtrArray * token_list)2171 static void Alignment_RYO_token_list_destroy(GPtrArray *token_list){
2172 register gint i;
2173 register Alignment_RYO_ComplexToken *rct;
2174 for(i = 0; i < token_list->len; i++){
2175 rct = token_list->pdata[i];
2176 Alignment_RYO_ComplexToken_destroy(rct);
2177 }
2178 g_ptr_array_free(token_list, TRUE);
2179 return;
2180 }
2181
2182 /**/
2183
2184 typedef struct {
2185 Alignment *alignment;
2186 AlignmentOperation *ao;
2187 gint operation_id;
2188 gint operation_pos;
2189 gint query_pos;
2190 gint target_pos;
2191 C4_Score *cell;
2192 C4_Score *start_cell;
2193 C4_Score score;
2194 gpointer user_data; /* No ref stored */
2195 } Alignment_Position;
2196
Alignment_Position_create(Alignment * alignment,gpointer user_data)2197 static Alignment_Position *Alignment_Position_create(
2198 Alignment *alignment, gpointer user_data){
2199 register Alignment_Position *ap = g_new(Alignment_Position, 1);
2200 register gint i;
2201 register gint shadow_total
2202 = 1 + alignment->model->total_shadow_designations;
2203 register C4_Calc *calc;
2204 ap->alignment = Alignment_share(alignment);
2205 ap->ao = alignment->operation_list->pdata[0];
2206 ap->operation_id = 0;
2207 ap->operation_pos = 0;
2208 ap->query_pos = alignment->region->query_start;
2209 ap->target_pos = alignment->region->target_start;
2210 ap->cell = g_new0(C4_Score, shadow_total);
2211 ap->user_data = user_data;
2212 if(alignment->model->start_state->cell_start_func){
2213 ap->start_cell = alignment->model->start_state->cell_start_func
2214 (ap->query_pos, ap->target_pos, user_data);
2215 for(i = 0; i < shadow_total; i++)
2216 ap->cell[i] = ap->start_cell[i];
2217 }
2218 if(alignment->model->init_func)
2219 alignment->model->init_func(alignment->region, user_data);
2220 for(i = 0; i < alignment->model->calc_list->len; i++){
2221 calc = alignment->model->calc_list->pdata[i];
2222 if(calc && calc->init_func)
2223 calc->init_func(alignment->region, user_data);
2224 }
2225 return ap;
2226 }
2227
Alignment_Position_destroy(Alignment_Position * ap)2228 static void Alignment_Position_destroy(Alignment_Position *ap){
2229 register gint i;
2230 register C4_Calc *calc;
2231 for(i = 0; i < ap->alignment->model->calc_list->len; i++){
2232 calc = ap->alignment->model->calc_list->pdata[i];
2233 if(calc && calc->exit_func)
2234 calc->exit_func(ap->alignment->region, ap->user_data);
2235 }
2236 if(ap->alignment->model->exit_func)
2237 ap->alignment->model->exit_func(ap->alignment->region,
2238 ap->user_data);
2239 Alignment_destroy(ap->alignment);
2240 g_free(ap);
2241 return;
2242 }
2243
Alignment_Position_set_shadows(Alignment_Position * ap)2244 static void Alignment_Position_set_shadows(Alignment_Position *ap){
2245 register gint i;
2246 register C4_State *state = ap->ao->transition->input;
2247 register C4_Shadow *shadow;
2248 for(i = 0; i < state->src_shadow_list->len; i++){
2249 shadow = state->src_shadow_list->pdata[i];
2250 ap->cell[1 + shadow->designation] = shadow->start_func(
2251 ap->query_pos, ap->target_pos,
2252 ap->user_data);
2253 }
2254 for(i = 0; i < ap->ao->transition->dst_shadow_list->len; i++){
2255 shadow = ap->ao->transition->dst_shadow_list->pdata[i];
2256 shadow->end_func(ap->cell[1 + shadow->designation],
2257 ap->query_pos + ap->ao->transition->advance_query,
2258 ap->target_pos + ap->ao->transition->advance_target,
2259 ap->user_data);
2260 }
2261 return;
2262 }
2263
Alignment_Position_next(Alignment_Position * ap)2264 static gboolean Alignment_Position_next(Alignment_Position *ap){
2265 ap->query_pos += ap->ao->transition->advance_query;
2266 ap->target_pos += ap->ao->transition->advance_target;
2267 /* Use next transition */
2268 if(++ap->operation_pos < ap->ao->length){
2269 Alignment_Position_set_shadows(ap);
2270 return TRUE;
2271 }
2272 /* Use next operation */
2273 if(++ap->operation_id < ap->alignment->operation_list->len){
2274 ap->operation_pos = 0;
2275 ap->ao = ap->alignment->operation_list->pdata[ap->operation_id];
2276 Alignment_Position_set_shadows(ap);
2277 return TRUE;
2278 }
2279 return FALSE;
2280 }
2281
2282 /**/
2283
2284 typedef struct {
2285 gint begin;
2286 gint end;
2287 Sequence *seq;
2288 } Alignment_Coding;
2289
Alignment_Coding_create(Alignment * alignment,Sequence * query,Sequence * target,gpointer user_data,gboolean on_query)2290 static Alignment_Coding *Alignment_Coding_create(Alignment *alignment,
2291 Sequence *query,
2292 Sequence *target,
2293 gpointer user_data,
2294 gboolean on_query){
2295 register Alignment_Coding *ac = g_new(Alignment_Coding, 1);
2296 register Alignment_Position *ap
2297 = Alignment_Position_create(alignment, user_data);
2298 register Sequence *src = NULL;
2299 register GString *seq = g_string_sized_new(1024);
2300 register gint i, advance, pos;
2301 if(on_query){
2302 g_assert(query->alphabet->type == Alphabet_Type_DNA);
2303 src = query;
2304 } else {
2305 g_assert(target->alphabet->type == Alphabet_Type_DNA);
2306 src = target;
2307 }
2308 while(Alignment_Position_next(ap)){
2309 if(on_query){
2310 advance = ap->ao->transition->advance_query;
2311 pos = ap->query_pos;
2312 } else {
2313 advance = ap->ao->transition->advance_target;
2314 pos = ap->target_pos;
2315 }
2316 switch(ap->ao->transition->label){
2317 case C4_Label_MATCH:
2318 if(advance == 3){
2319 if(!seq->len)
2320 ac->begin = pos;
2321 g_string_append_c(seq, Sequence_get_symbol(src, pos));
2322 g_string_append_c(seq, Sequence_get_symbol(src, pos+1));
2323 g_string_append_c(seq, Sequence_get_symbol(src, pos+2));
2324 ac->end = pos;
2325 }
2326 break;
2327 case C4_Label_SPLIT_CODON:
2328 g_assert(advance);
2329 g_assert(seq);
2330 g_assert(seq->len);
2331 for(i = 0; i < advance; i++)
2332 g_string_append_c(seq, Sequence_get_symbol(src, pos+i));
2333 break;
2334 case C4_Label_GAP:
2335 if(advance == 3){
2336 g_assert(seq);
2337 g_assert(seq->len);
2338 g_string_append_c(seq, Sequence_get_symbol(src, pos));
2339 g_string_append_c(seq, Sequence_get_symbol(src, pos+1));
2340 g_string_append_c(seq, Sequence_get_symbol(src, pos+2));
2341 }
2342 break;
2343 default: /* ignored */
2344 break;
2345 }
2346 }
2347 Alignment_Position_destroy(ap);
2348 ac->seq = Sequence_create("coding", NULL, seq->str, seq->len,
2349 src->strand, src->alphabet);
2350 g_string_free(seq, TRUE);
2351 return ac;
2352 }
2353
Alignment_Coding_destroy(Alignment_Coding * ac)2354 static void Alignment_Coding_destroy(Alignment_Coding *ac){
2355 Sequence_destroy(ac->seq);
2356 g_free(ac);
2357 return;
2358 }
2359
2360 /**/
2361
Alignment_RYO_token_list_print(GPtrArray * token_list,Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gint rank,gpointer user_data,gpointer self_data,FILE * fp)2362 static void Alignment_RYO_token_list_print(GPtrArray *token_list,
2363 Alignment *alignment, Sequence *query, Sequence *target,
2364 Translate *translate, gint rank,
2365 gpointer user_data, gpointer self_data, FILE *fp){
2366 register gint i, j, pto_start = -1;
2367 register Alignment_RYO_ComplexToken *rct;
2368 register Sequence *seq, *subseq;
2369 register Alignment_Position *ap = NULL;
2370 register Alignment_Coding *qy_ac = NULL, *tg_ac = NULL, *ac;
2371 for(i = 0; i < token_list->len; i++){
2372 rct = token_list->pdata[i];
2373 seq = rct->on_query?query:target;
2374 switch(rct->token){
2375 case Alignment_RYO_TOKEN_STRING:
2376 fprintf(fp, "%s", rct->str->str);
2377 break;
2378 /**/
2379 case Alignment_RYO_TOKEN_ID:
2380 fprintf(fp, "%s", seq->id);
2381 break;
2382 case Alignment_RYO_TOKEN_DEF:
2383 if(seq->def)
2384 fprintf(fp, "%s", seq->def);
2385 break;
2386 case Alignment_RYO_TOKEN_LEN:
2387 fprintf(fp, "%d", seq->len);
2388 break;
2389 case Alignment_RYO_TOKEN_STRAND:
2390 fprintf(fp, "%c", Sequence_get_strand_as_char(seq));
2391 break;
2392 case Alignment_RYO_TOKEN_SEQ:
2393 Sequence_print_fasta_block(seq, fp);
2394 break;
2395 case Alignment_RYO_TOKEN_TYPE:
2396 fprintf(fp, "%s",
2397 Alphabet_Type_get_name(seq->alphabet->type));
2398 break;
2399 /**/
2400 case Alignment_RYO_TOKEN_ALIGN_BEGIN:
2401 fprintf(fp, "%d",
2402 Alignment_get_coordinate(alignment, query, target,
2403 rct->on_query, TRUE));
2404 break;
2405 case Alignment_RYO_TOKEN_ALIGN_END:
2406 fprintf(fp, "%d",
2407 Alignment_get_coordinate(alignment, query, target,
2408 rct->on_query, FALSE));
2409 break;
2410 case Alignment_RYO_TOKEN_ALIGN_LEN:
2411 fprintf(fp, "%d",
2412 rct->on_query?alignment->region->query_length
2413 :alignment->region->target_length);
2414 break;
2415 case Alignment_RYO_TOKEN_ALIGN_SEQ:
2416 if(rct->on_query){
2417 subseq = Sequence_subseq(seq,
2418 alignment->region->query_start,
2419 alignment->region->query_length);
2420 } else {
2421 subseq = Sequence_subseq(seq,
2422 alignment->region->target_start,
2423 alignment->region->target_length);
2424 }
2425 Sequence_print_fasta_block(subseq, fp);
2426 Sequence_destroy(subseq);
2427 break;
2428 /**/
2429 case Alignment_RYO_TOKEN_CODING_BEGIN:
2430 ac = rct->on_query?qy_ac:tg_ac;
2431 if(!ac)
2432 ac = Alignment_Coding_create(alignment,
2433 query, target, user_data, rct->on_query);
2434 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2435 query, target,
2436 ac->begin, ac->begin, rct->on_query));
2437 break;
2438 case Alignment_RYO_TOKEN_CODING_END:
2439 ac = rct->on_query?qy_ac:tg_ac;
2440 if(!ac)
2441 ac = Alignment_Coding_create(alignment,
2442 query, target, user_data, rct->on_query);
2443 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2444 query, target,
2445 ac->end, ac->end, rct->on_query));
2446 break;
2447 case Alignment_RYO_TOKEN_CODING_LEN:
2448 ac = rct->on_query?qy_ac:tg_ac;
2449 if(!ac)
2450 ac = Alignment_Coding_create(alignment,
2451 query, target, user_data, rct->on_query);
2452 fprintf(fp, "%d", ac->seq->len);
2453 break;
2454 case Alignment_RYO_TOKEN_CODING_SEQ:
2455 ac = rct->on_query?qy_ac:tg_ac;
2456 if(!ac)
2457 ac = Alignment_Coding_create(alignment,
2458 query, target, user_data, rct->on_query);
2459 Sequence_print_fasta_block(ac->seq, fp);
2460 break;
2461 /**/
2462 case Alignment_RYO_TOKEN_SCORE:
2463 fprintf(fp, "%d", alignment->score);
2464 break;
2465 case Alignment_RYO_TOKEN_MODEL_NAME:
2466 fprintf(fp, "%s", alignment->model->name);
2467 break;
2468 case Alignment_RYO_TOKEN_RANK:
2469 if(rank == -1)
2470 fprintf(fp, "%%_EXONERATE_BESTN_RANK_%%");
2471 else
2472 fprintf(fp, "%d", rank);
2473 break;
2474 /**/
2475 case Alignment_RYO_TOKEN_PERCENT_ID:
2476 fprintf(fp, "%2.2f", Alignment_get_percent_score(
2477 alignment, query, target,
2478 translate, TRUE, user_data));
2479 break;
2480 case Alignment_RYO_TOKEN_PERCENT_SIMILARITY:
2481 fprintf(fp, "%2.2f", Alignment_get_percent_score(
2482 alignment, query, target,
2483 translate, FALSE, user_data));
2484 break;
2485 case Alignment_RYO_TOKEN_PERCENT_SELF:
2486 fprintf(fp, "%2.2f", Alignment_get_percent_self(
2487 alignment, query, target,
2488 translate, user_data, self_data));
2489 break;
2490 case Alignment_RYO_TOKEN_GENE_ORIENTATION:
2491 fprintf(fp, "%c",
2492 Alignment_get_gene_orientation(alignment));
2493 break;
2494 /**/
2495 case Alignment_RYO_TOKEN_EQUIVALENCED_TOTAL:
2496 fprintf(fp, "%d", Alignment_get_equivalenced_total(
2497 alignment));
2498 break;
2499 case Alignment_RYO_TOKEN_EQUIVALENCED_IDENTCAL:
2500 fprintf(fp, "%d", Alignment_get_equivalenced_matching(
2501 alignment, query, target,
2502 translate, TRUE, user_data));
2503 break;
2504 case Alignment_RYO_TOKEN_EQUIVALENCED_SIMILAR:
2505 fprintf(fp, "%d", Alignment_get_equivalenced_matching(
2506 alignment, query, target,
2507 translate, FALSE, user_data));
2508 break;
2509 case Alignment_RYO_TOKEN_EQUIVALENCED_MISMATCHES:
2510 fprintf(fp, "%d",
2511 Alignment_get_equivalenced_total(alignment)
2512 - Alignment_get_equivalenced_matching(
2513 alignment, query, target,
2514 translate, TRUE, user_data));
2515 break;
2516 /**/
2517 case Alignment_RYO_TOKEN_SUGAR_BLOCK:
2518 Alignment_print_sugar_block(alignment, query, target, fp);
2519 break;
2520 case Alignment_RYO_TOKEN_CIGAR_BLOCK:
2521 Alignment_print_cigar_block(alignment, query, target, fp);
2522 break;
2523 case Alignment_RYO_TOKEN_VULGAR_BLOCK:
2524 Alignment_print_vulgar_block(alignment, query, target, fp);
2525 break;
2526 /**/
2527 case Alignment_RYO_TOKEN_PTO_OPEN:
2528 if(pto_start != -1)
2529 g_error("Cannot nest PTO brackets");
2530 pto_start = i;
2531 ap = Alignment_Position_create(alignment, user_data);
2532 break;
2533 case Alignment_RYO_TOKEN_PTO_CLOSE:
2534 if(pto_start == -1)
2535 g_error("No opening PTO bracket in --ryo string");
2536 if(Alignment_Position_next(ap)){
2537 i = pto_start;
2538 } else {
2539 pto_start = -1;
2540 Alignment_Position_destroy(ap);
2541 ap = NULL;
2542 }
2543 break;
2544 /**/
2545 case Alignment_RYO_TOKEN_PTO_SEQ:
2546 g_assert(pto_start != -1);
2547 if(rct->on_query){
2548 if(ap->ao->transition->advance_query){
2549 for(j = 0; j < ap->ao->transition->advance_query; j++)
2550 fprintf(fp, "%c",
2551 Sequence_get_symbol(query, ap->query_pos+j));
2552 } else {
2553 fprintf(fp, "-");
2554 }
2555 } else {
2556 if(ap->ao->transition->advance_target){
2557 for(j = 0; j < ap->ao->transition->advance_target; j++)
2558 fprintf(fp, "%c",
2559 Sequence_get_symbol(target, ap->target_pos+j));
2560 } else {
2561 fprintf(fp, "-");
2562 }
2563 }
2564 break;
2565 case Alignment_RYO_TOKEN_PTO_ADVANCE:
2566 fprintf(fp, "%d",
2567 rct->on_query?ap->ao->transition->advance_query
2568 :ap->ao->transition->advance_target);
2569 break;
2570 case Alignment_RYO_TOKEN_PTO_BEGIN:
2571 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2572 query, target,
2573 ap->query_pos, ap->target_pos, rct->on_query));
2574 break;
2575 case Alignment_RYO_TOKEN_PTO_END:
2576 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2577 query, target,
2578 ap->query_pos+ap->ao->transition->advance_query,
2579 ap->target_pos+ap->ao->transition->advance_target,
2580 rct->on_query));
2581 break;
2582 /**/
2583 case Alignment_RYO_TOKEN_PTO_NAME:
2584 fprintf(fp, "%s", ap->ao->transition->name);
2585 break;
2586 case Alignment_RYO_TOKEN_PTO_SCORE:
2587 fprintf(fp, "%d",
2588 C4_Calc_score(ap->ao->transition->calc,
2589 ap->query_pos,
2590 ap->target_pos, user_data));
2591 break;
2592 case Alignment_RYO_TOKEN_PTO_LABEL:
2593 fprintf(fp, "%s",
2594 C4_Label_get_name(ap->ao->transition->label));
2595 break;
2596 default:
2597 g_error("Unknown token [%d]", rct->token);
2598 break;
2599 }
2600 }
2601 if(pto_start != -1)
2602 g_error("No closing PTO bracket in --ryo string");
2603 if(qy_ac)
2604 Alignment_Coding_destroy(qy_ac);
2605 if(tg_ac)
2606 Alignment_Coding_destroy(tg_ac);
2607 return;
2608 }
2609
Alignment_display_ryo(Alignment * alignment,Sequence * query,Sequence * target,gchar * format,Translate * translate,gint rank,gpointer user_data,gpointer self_data,FILE * fp)2610 void Alignment_display_ryo(Alignment *alignment,
2611 Sequence *query, Sequence *target, gchar *format,
2612 Translate *translate, gint rank,
2613 gpointer user_data, gpointer self_data, FILE *fp){
2614 register GPtrArray *token_list = Alignment_RYO_tokenise(format);
2615 Alignment_RYO_token_list_print(token_list, alignment,
2616 query, target, translate, rank,
2617 user_data, self_data, fp);
2618 Alignment_RYO_token_list_destroy(token_list);
2619 return;
2620 }
2621
Alignment_display_sugar(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)2622 void Alignment_display_sugar(Alignment *alignment,
2623 Sequence *query, Sequence *target, FILE *fp){
2624 fprintf(fp, "sugar: ");
2625 Alignment_print_sugar_block(alignment, query, target, fp);
2626 fprintf(fp, "\n");
2627 return;
2628 }
2629 /* sugar: simple ungapped alignment report
2630 */
2631
Alignment_display_cigar(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)2632 void Alignment_display_cigar(Alignment *alignment,
2633 Sequence *query, Sequence *target, FILE *fp){
2634 fprintf(fp, "cigar: ");
2635 Alignment_print_sugar_block(alignment, query, target, fp);
2636 fprintf(fp, " ");
2637 Alignment_print_cigar_block(alignment, query, target, fp);
2638 fprintf(fp, "\n");
2639 return;
2640 }
2641 /* cigar: concise idiosyncratic gapped alignment report
2642 * query_id query_start query_end query_strand \
2643 * target_id target_start target_end target_strand \
2644 * score \
2645 * <[MID] length> (m=Match i=Insert d=Delete)
2646 *
2647 */
2648
Alignment_display_vulgar(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)2649 void Alignment_display_vulgar(Alignment *alignment,
2650 Sequence *query, Sequence *target, FILE *fp){
2651 fprintf(fp, "vulgar: ");
2652 Alignment_print_sugar_block(alignment, query, target, fp);
2653 fprintf(fp, " ");
2654 Alignment_print_vulgar_block(alignment, query, target, fp);
2655 fprintf(fp, "\n");
2656 return;
2657 }
2658
2659 /**/
2660
Alignment_display_gff_header(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,FILE * fp)2661 static void Alignment_display_gff_header(Alignment *alignment,
2662 Sequence *query, Sequence *target, gboolean report_on_query, FILE *fp){
2663 time_t timenow = time(NULL);
2664 register struct tm *tm_ptr = localtime(&timenow);
2665 gchar curr_time_str[12];
2666 strftime(curr_time_str, 30, "%Y-%m-%d", tm_ptr);
2667 fprintf(fp, "#\n"
2668 "##gff-version 2\n"
2669 "##source-version %s:%s %s\n"
2670 "##date %s\n"
2671 "##type %s\n"
2672 "#\n",
2673 PACKAGE, alignment->model->name, VERSION,
2674 curr_time_str,
2675 Alphabet_Type_get_name(report_on_query
2676 ? query->alphabet->type
2677 : target->alphabet->type));
2678 fprintf(fp, "#\n# seqname source feature start end"
2679 " score strand frame attributes\n#\n");
2680 return;
2681 }
2682
Alignment_display_gff_line(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,gchar * feature,gint query_start,gint target_start,gint query_end,gint target_end,gboolean show_score,gint score,gboolean show_frame,gint frame,GPtrArray * attribute_list,FILE * fp)2683 static void Alignment_display_gff_line(Alignment *alignment,
2684 Sequence *query,
2685 Sequence *target,
2686 gboolean report_on_query,
2687 gchar *feature,
2688 gint query_start,
2689 gint target_start,
2690 gint query_end,
2691 gint target_end,
2692 gboolean show_score, gint score,
2693 gboolean show_frame, gint frame,
2694 GPtrArray *attribute_list, FILE *fp){
2695 /* <seqname> <source> <feature> <start> <end> <score> <strand> <frame>
2696 * [attributes] [comments]
2697 */
2698 register Sequence *seq;
2699 register gint i, start, end, t;
2700 register gchar *attribute;
2701 if(report_on_query){
2702 seq = query;
2703 start = query_start;
2704 end = query_end;
2705 } else {
2706 seq = target;
2707 start = target_start;
2708 end = target_end;
2709 }
2710 if(seq->strand == Sequence_Strand_REVCOMP){
2711 start = seq->len - start;
2712 end = seq->len - end;
2713 /* swap coords for gff output */
2714 t = start;
2715 start = end;
2716 end = t;
2717 }
2718 /* Sanity checks for valid GFF output */
2719 g_assert(start >= 0);
2720 g_assert(end <= seq->len);
2721 g_assert(start < end);
2722 g_assert((!show_frame) || ((frame >= 0) && (frame <= 2)));
2723 fprintf(fp, "%s\t%s:%s\t%s\t%d\t%d\t",
2724 seq->id, PACKAGE, alignment->model->name, feature, start+1, end);
2725 if(show_score)
2726 fprintf(fp, "%d", score);
2727 else
2728 fprintf(fp, ".");
2729 fprintf(fp, "\t%c\t", Sequence_get_strand_as_char(seq));
2730 if(show_frame)
2731 fprintf(fp, "%d", frame);
2732 else
2733 fprintf(fp, ".");
2734 fprintf(fp, "\t");
2735 if(attribute_list){
2736 for(i = 0; i < attribute_list->len; i++){
2737 attribute = attribute_list->pdata[i];
2738 fprintf(fp, "%s", attribute);
2739 if((i+1) < attribute_list->len)
2740 fprintf(fp, " ; ");
2741 }
2742 }
2743 fprintf(fp, "\n");
2744 return;
2745 }
2746
Alignment_free_attribute_list(GPtrArray * attribute_list)2747 static void Alignment_free_attribute_list(GPtrArray *attribute_list){
2748 register gint i;
2749 for(i = 0; i < attribute_list->len; i++)
2750 g_free(attribute_list->pdata[i]);
2751 g_ptr_array_free(attribute_list, TRUE);
2752 return;
2753 }
2754
Alignment_display_gff_exon(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_on_query,gint query_pos,gint target_pos,gint exon_query_start,gint exon_target_start,gint exon_query_gap,gint exon_target_gap,gint exon_query_frameshift,gint exon_target_frameshift,gpointer user_data,FILE * fp)2755 static void Alignment_display_gff_exon(Alignment *alignment,
2756 Sequence *query,
2757 Sequence *target,
2758 Translate *translate,
2759 gboolean report_on_query,
2760 gint query_pos,
2761 gint target_pos,
2762 gint exon_query_start,
2763 gint exon_target_start,
2764 gint exon_query_gap,
2765 gint exon_target_gap,
2766 gint exon_query_frameshift,
2767 gint exon_target_frameshift,
2768 gpointer user_data, FILE *fp){
2769 register GPtrArray *attribute_list = g_ptr_array_new();
2770 g_ptr_array_add(attribute_list,
2771 g_strdup_printf("insertions %d",
2772 report_on_query?exon_query_gap
2773 :exon_target_gap));
2774 g_ptr_array_add(attribute_list,
2775 g_strdup_printf("deletions %d",
2776 report_on_query?exon_target_gap
2777 :exon_query_gap));
2778 g_ptr_array_add(attribute_list,
2779 g_strdup_printf("identity %2.2f",
2780 Alignment_get_percent_score_region(alignment,
2781 query, target, translate, TRUE, user_data,
2782 exon_query_start, query_pos)));
2783 g_ptr_array_add(attribute_list,
2784 g_strdup_printf("similarity %2.2f",
2785 Alignment_get_percent_score_region(alignment,
2786 query, target, translate, FALSE, user_data,
2787 exon_query_start, query_pos)));
2788 if(report_on_query){
2789 if(exon_query_frameshift)
2790 g_ptr_array_add(attribute_list,
2791 g_strdup_printf("frameshifts %d",
2792 exon_query_frameshift));
2793 } else {
2794 if(exon_target_frameshift)
2795 g_ptr_array_add(attribute_list,
2796 g_strdup_printf("frameshifts %d",
2797 exon_target_frameshift));
2798 }
2799 Alignment_display_gff_line(alignment, query, target,
2800 report_on_query, "exon",
2801 exon_query_start, exon_target_start,
2802 query_pos, target_pos,
2803 FALSE, 0, FALSE, 0, attribute_list, fp);
2804 Alignment_free_attribute_list(attribute_list);
2805 return;
2806 }
2807
Alignment_display_gff_utr(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,gboolean post_cds,gint cds_query_start,gint cds_target_start,gint cds_query_end,gint cds_target_end,gint exon_query_start,gint exon_target_start,gint query_pos,gint target_pos,FILE * fp)2808 static void Alignment_display_gff_utr(Alignment *alignment,
2809 Sequence *query, Sequence *target,
2810 gboolean report_on_query, gboolean post_cds,
2811 gint cds_query_start, gint cds_target_start,
2812 gint cds_query_end, gint cds_target_end,
2813 gint exon_query_start, gint exon_target_start,
2814 gint query_pos, gint target_pos, FILE *fp){
2815 register gint curr_cds_query_start, curr_cds_target_start,
2816 curr_utr_query_start, curr_utr_target_start;
2817 if(post_cds){
2818 curr_utr_query_start = MAX(exon_query_start, cds_query_end);
2819 curr_utr_target_start = MAX(exon_target_start, cds_target_end);
2820 Alignment_display_gff_line(alignment, query, target,
2821 report_on_query, "utr3",
2822 curr_utr_query_start, curr_utr_target_start,
2823 query_pos, target_pos,
2824 FALSE, 0, FALSE, 0, NULL, fp);
2825 } else if(cds_query_start == -1){
2826 Alignment_display_gff_line(alignment, query, target,
2827 report_on_query, "utr5",
2828 exon_query_start, exon_target_start,
2829 query_pos, target_pos,
2830 FALSE, 0, FALSE, 0, NULL, fp);
2831 } else {
2832 curr_cds_query_start = MAX(cds_query_start,
2833 exon_query_start);
2834 curr_cds_target_start = MAX(cds_target_start,
2835 exon_target_start);
2836 Alignment_display_gff_line(alignment, query, target,
2837 report_on_query, "cds",
2838 curr_cds_query_start, curr_cds_target_start,
2839 query_pos, target_pos,
2840 FALSE, 0, FALSE, 0, NULL, fp);
2841 }
2842 return;
2843 }
2844
Alignment_display_gff_gene(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_on_query,gint result_id,gpointer user_data,FILE * fp)2845 static void Alignment_display_gff_gene(Alignment *alignment,
2846 Sequence *query, Sequence *target, Translate *translate,
2847 gboolean report_on_query,
2848 gint result_id, gpointer user_data, FILE *fp){
2849 register gint i;
2850 register gint query_pos = alignment->region->query_start,
2851 target_pos = alignment->region->target_start;
2852 register gint intron_id = 0, intron_length = 0;
2853 register gint exon_query_start = 0, exon_target_start = 0;
2854 register gint exon_query_gap = 0, exon_target_gap = 0;
2855 register gint exon_query_frameshift = 0, exon_target_frameshift = 0;
2856 register gint cds_query_start = -1, cds_target_start = -1,
2857 cds_query_end = -1, cds_target_end = -1;
2858 register gboolean in_exon = FALSE, post_cds = FALSE;
2859 register AlignmentOperation *ao;
2860 register gchar gene_orientation
2861 = Alignment_get_gene_orientation(alignment);
2862 register GPtrArray *attribute_list = g_ptr_array_new();
2863 register gint curr_utr_query_start, curr_utr_target_start;
2864 /**/
2865 g_ptr_array_add(attribute_list,
2866 g_strdup_printf("gene_id %d", result_id));
2867 g_ptr_array_add(attribute_list,
2868 g_strdup_printf("sequence %s",
2869 report_on_query?target->id:query->id));
2870 g_ptr_array_add(attribute_list,
2871 g_strdup_printf("gene_orientation %c", gene_orientation));
2872 g_ptr_array_add(attribute_list,
2873 g_strdup_printf("identity %2.2f",
2874 Alignment_get_percent_score(alignment,
2875 query, target, translate, TRUE, user_data)));
2876 g_ptr_array_add(attribute_list,
2877 g_strdup_printf("similarity %2.2f",
2878 Alignment_get_percent_score(alignment,
2879 query, target, translate, FALSE, user_data)));
2880 Alignment_display_gff_line(alignment, query, target, report_on_query,
2881 "gene",
2882 alignment->region->query_start,
2883 alignment->region->target_start,
2884 Region_query_end(alignment->region),
2885 Region_target_end(alignment->region),
2886 TRUE, alignment->score,
2887 FALSE, 0, attribute_list, fp);
2888 Alignment_free_attribute_list(attribute_list);
2889 for(i = 1; i < alignment->operation_list->len; i++){
2890 ao = alignment->operation_list->pdata[i];
2891 switch(ao->transition->label){
2892 case C4_Label_MATCH:
2893 if((ao->transition->advance_query == 1)
2894 && (ao->transition->advance_target == 1)){
2895 if((cds_query_start != -1) && (!post_cds)){
2896 Alignment_display_gff_line(alignment, query, target,
2897 report_on_query, "cds",
2898 exon_query_start, exon_target_start,
2899 query_pos, target_pos,
2900 FALSE, 0, FALSE, 0, NULL, fp);
2901 post_cds = TRUE;
2902 }
2903 } else {
2904 if(cds_query_start == -1){ /* First coding exon */
2905 if(in_exon){ /* Have UTR5 */
2906 Alignment_display_gff_line(alignment, query, target,
2907 report_on_query, "utr5",
2908 exon_query_start, exon_target_start,
2909 query_pos, target_pos,
2910 FALSE, 0, FALSE, 0, NULL, fp);
2911 }
2912 cds_query_start = query_pos;
2913 cds_target_start = target_pos;
2914 }
2915 cds_query_end = query_pos
2916 + (ao->transition->advance_query
2917 * ao->length);
2918 cds_target_end = target_pos
2919 + (ao->transition->advance_target
2920 * ao->length);
2921 }
2922 /*fallthrough*/
2923 case C4_Label_SPLIT_CODON:
2924 if(!in_exon){
2925 exon_query_start = query_pos;
2926 exon_target_start = target_pos;
2927 exon_query_gap = 0;
2928 exon_target_gap = 0;
2929 exon_query_frameshift = 0;
2930 exon_target_frameshift = 0;
2931 in_exon = TRUE;
2932 }
2933 break;
2934 case C4_Label_NONE:
2935 break;
2936 case C4_Label_GAP:
2937 exon_query_gap += (ao->transition->advance_query
2938 * ao->length);
2939 exon_target_gap += (ao->transition->advance_target
2940 * ao->length);
2941 break;
2942 case C4_Label_5SS:
2943 if(report_on_query){
2944 g_assert(ao->transition->advance_query == 2);
2945 g_assert(ao->transition->advance_target == 0);
2946 } else {
2947 g_assert(ao->transition->advance_query == 0);
2948 g_assert(ao->transition->advance_target == 2);
2949 }
2950 if(in_exon){
2951 Alignment_display_gff_utr(alignment,
2952 query, target, report_on_query, post_cds,
2953 cds_query_start, cds_target_start,
2954 cds_query_end, cds_target_end,
2955 exon_query_start, exon_target_start,
2956 query_pos, target_pos, fp);
2957 Alignment_display_gff_exon(alignment,
2958 query, target, translate, report_on_query,
2959 query_pos, target_pos,
2960 exon_query_start, exon_target_start,
2961 exon_query_gap, exon_target_gap,
2962 exon_query_frameshift,
2963 exon_target_frameshift, user_data, fp);
2964 in_exon = FALSE;
2965 }
2966 attribute_list = g_ptr_array_new();
2967 g_ptr_array_add(attribute_list,
2968 g_strdup_printf("intron_id %d", intron_id+1));
2969 g_ptr_array_add(attribute_list,
2970 g_strdup_printf("splice_site \"%c%c\"",
2971 report_on_query?Sequence_get_symbol(query, query_pos)
2972 :Sequence_get_symbol(target, target_pos),
2973 report_on_query?Sequence_get_symbol(query, query_pos+1)
2974 :Sequence_get_symbol(target, target_pos+1)));
2975 Alignment_display_gff_line(alignment, query, target,
2976 report_on_query, "splice5",
2977 query_pos, target_pos,
2978 query_pos+2, target_pos+2,
2979 FALSE, 0, FALSE, 0, attribute_list, fp);
2980 Alignment_free_attribute_list(attribute_list);
2981 intron_length = 0;
2982 break;
2983 case C4_Label_3SS:
2984 if(report_on_query){
2985 g_assert(ao->transition->advance_query == 2);
2986 g_assert(ao->transition->advance_target == 0);
2987 } else {
2988 g_assert(ao->transition->advance_query == 0);
2989 g_assert(ao->transition->advance_target == 2);
2990 }
2991 if(in_exon){
2992 Alignment_display_gff_utr(alignment,
2993 query, target, report_on_query, post_cds,
2994 cds_query_start, cds_target_start,
2995 cds_query_end, cds_target_end,
2996 exon_query_start, exon_target_start,
2997 query_pos, target_pos, fp);
2998 Alignment_display_gff_exon(alignment,
2999 query, target, translate, report_on_query,
3000 query_pos, target_pos,
3001 exon_query_start, exon_target_start,
3002 exon_query_gap, exon_target_gap,
3003 exon_query_frameshift,
3004 exon_target_frameshift, user_data, fp);
3005 in_exon = FALSE;
3006 }
3007 if(gene_orientation == '+'){
3008 attribute_list = g_ptr_array_new();
3009 g_ptr_array_add(attribute_list,
3010 g_strdup_printf("intron_id %d", ++intron_id));
3011 Alignment_display_gff_line(alignment, query, target,
3012 report_on_query, "intron",
3013 query_pos-intron_length-2,
3014 target_pos-intron_length-2,
3015 query_pos+2, target_pos+2,
3016 FALSE, 0, FALSE, 0, attribute_list, fp);
3017 Alignment_free_attribute_list(attribute_list);
3018 }
3019 attribute_list = g_ptr_array_new();
3020 g_ptr_array_add(attribute_list,
3021 g_strdup_printf("intron_id %d", intron_id-1));
3022 g_ptr_array_add(attribute_list,
3023 g_strdup_printf("splice_site \"%c%c\"",
3024 report_on_query?Sequence_get_symbol(query, query_pos)
3025 :Sequence_get_symbol(target, target_pos),
3026 report_on_query?Sequence_get_symbol(query, query_pos+1)
3027 :Sequence_get_symbol(target, target_pos+1)));
3028 Alignment_display_gff_line(alignment, query, target,
3029 report_on_query, "splice3",
3030 query_pos, target_pos,
3031 query_pos+2, target_pos+2,
3032 FALSE, 0, FALSE, 0, attribute_list, fp);
3033 Alignment_free_attribute_list(attribute_list);
3034 intron_length = 0;
3035 break;
3036 case C4_Label_INTRON:
3037 if(report_on_query){
3038 g_assert(ao->transition->advance_query == 1);
3039 g_assert(ao->transition->advance_target == 0);
3040 } else {
3041 g_assert(ao->transition->advance_query == 0);
3042 g_assert(ao->transition->advance_target == 1);
3043 }
3044 intron_length += ao->length;
3045 break;
3046 case C4_Label_FRAMESHIFT:
3047 exon_query_frameshift
3048 += (ao->transition->advance_query * ao->length);
3049 exon_target_frameshift
3050 += (ao->transition->advance_target * ao->length);
3051 break;
3052 case C4_Label_NER:
3053 g_error("Unexpected NER for gff gene output");
3054 break;
3055 default:
3056 g_error("Unknown C4_Label [%s]",
3057 C4_Label_get_name(ao->transition->label));
3058 break;
3059 }
3060 query_pos += (ao->transition->advance_query * ao->length);
3061 target_pos += (ao->transition->advance_target * ao->length);
3062 }
3063 if(in_exon){
3064 if(cds_query_end != -1){
3065 if(cds_query_end != query_pos){ /* Have 3' UTR */
3066 curr_utr_query_start = MAX(exon_query_start, cds_query_end);
3067 curr_utr_target_start = MAX(exon_target_start, cds_target_end);
3068 Alignment_display_gff_line(alignment, query, target,
3069 report_on_query, "utr3b",
3070 curr_utr_query_start,
3071 curr_utr_target_start,
3072 query_pos, target_pos,
3073 FALSE, 0, FALSE, 0, NULL, fp);
3074 } else {
3075 Alignment_display_gff_line(alignment, query, target,
3076 report_on_query, "cds",
3077 exon_query_start, exon_target_start,
3078 query_pos, target_pos,
3079 FALSE, 0, FALSE, 0, NULL, fp);
3080 }
3081 }
3082 Alignment_display_gff_exon(alignment,
3083 query, target, translate, report_on_query,
3084 query_pos, target_pos,
3085 exon_query_start, exon_target_start,
3086 exon_query_gap, exon_target_gap,
3087 exon_query_frameshift, exon_target_frameshift, user_data, fp);
3088 }
3089 return;
3090 }
3091
Alignment_display_gff_similarity(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,gint result_id,FILE * fp)3092 static void Alignment_display_gff_similarity(Alignment *alignment,
3093 Sequence *query, Sequence *target, gboolean report_on_query,
3094 gint result_id, FILE *fp){
3095 register gint i, qp, tp;
3096 register gint query_pos = alignment->region->query_start,
3097 target_pos = alignment->region->target_start;
3098 register AlignmentOperation *ao;
3099 register GPtrArray *attribute_list = g_ptr_array_new();
3100 g_ptr_array_add(attribute_list,
3101 g_strdup_printf("alignment_id %d", result_id));
3102 if(report_on_query)
3103 g_ptr_array_add(attribute_list,
3104 g_strdup_printf("Target %s", target->id));
3105 else
3106 g_ptr_array_add(attribute_list,
3107 g_strdup_printf("Query %s", query->id));
3108 /* Align <seq_start> <target_start> [<length>] ; */
3109 for(i = 1; i < alignment->operation_list->len; i++){
3110 ao = alignment->operation_list->pdata[i];
3111 switch(ao->transition->label){
3112 case C4_Label_MATCH:
3113 qp = query_pos;
3114 tp = target_pos;
3115 if(query->strand == Sequence_Strand_REVCOMP)
3116 qp = query->len - qp;
3117 if(target->strand == Sequence_Strand_REVCOMP)
3118 tp = target->len - tp;
3119 if(report_on_query){
3120 g_ptr_array_add(attribute_list,
3121 g_strdup_printf("Align %d %d %d", qp+1, tp+1,
3122 ao->length*ao->transition->advance_query));
3123 } else {
3124 g_ptr_array_add(attribute_list,
3125 g_strdup_printf("Align %d %d %d", tp+1, qp+1,
3126 ao->length*ao->transition->advance_target));
3127 }
3128 break;
3129 case C4_Label_NONE:
3130 case C4_Label_GAP:
3131 case C4_Label_NER:
3132 case C4_Label_5SS:
3133 case C4_Label_3SS:
3134 case C4_Label_INTRON:
3135 case C4_Label_SPLIT_CODON:
3136 case C4_Label_FRAMESHIFT:
3137 break;
3138 default:
3139 g_error("Unknown C4_Label [%s]",
3140 C4_Label_get_name(ao->transition->label));
3141 break;
3142 }
3143 query_pos += (ao->transition->advance_query * ao->length);
3144 target_pos += (ao->transition->advance_target * ao->length);
3145 }
3146 Alignment_display_gff_line(alignment, query, target, report_on_query,
3147 "similarity",
3148 alignment->region->query_start,
3149 alignment->region->target_start,
3150 Region_query_end(alignment->region),
3151 Region_target_end(alignment->region),
3152 TRUE, alignment->score,
3153 FALSE, 0, attribute_list, fp);
3154 Alignment_free_attribute_list(attribute_list);
3155 return;
3156 }
3157
3158 /**/
3159
Alignment_display_gff(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_on_query,gboolean report_on_genomic,gint result_id,gpointer user_data,FILE * fp)3160 void Alignment_display_gff(Alignment *alignment,
3161 Sequence *query, Sequence *target,
3162 Translate *translate,
3163 gboolean report_on_query,
3164 gboolean report_on_genomic,
3165 gint result_id, gpointer user_data, FILE *fp){
3166 fprintf(fp, "# --- START OF GFF DUMP ---\n#\n");
3167 Alignment_display_gff_header(alignment, query, target,
3168 report_on_query, fp);
3169 if(report_on_genomic){
3170 Alignment_display_gff_gene(alignment, query, target, translate,
3171 report_on_query, result_id, user_data, fp);
3172 }
3173 Alignment_display_gff_similarity(alignment, query, target,
3174 report_on_query, result_id, fp);
3175 fprintf(fp, "# --- END OF GFF DUMP ---\n#\n");
3176 return;
3177 }
3178
3179 /**/
3180
3181 /*
3182 features: gene,intron,exon,splice[35],similarity
3183 */
3184
3185 /**/
3186
3187 #ifndef G_DISABLE_ASSERT
Alignment_has_valid_alignment(Alignment * alignment,gpointer user_data)3188 static gboolean Alignment_has_valid_alignment(Alignment *alignment,
3189 gpointer user_data){
3190 register gint query_pos = alignment->region->query_start,
3191 target_pos = alignment->region->target_start;
3192 register C4_Score score;
3193 register C4_Calc *calc;
3194 register AlignmentOperation *alignment_operation;
3195 register C4_State *state;
3196 register C4_Transition *transition;
3197 register C4_Shadow *shadow;
3198 register gint i, j, k, t;
3199 register C4_Score *start_cell, *cell
3200 = g_new0(C4_Score, 1+alignment->model->total_shadow_designations);
3201 /* FIXME: temp */
3202 /* #define DEBUG */
3203 #ifdef DEBUG
3204 g_message("Check model [%s] align score [%d]", alignment->model->name,
3205 alignment->score);
3206 for(i = 0; i < alignment->model->transition_list->len; i++){
3207 transition = alignment->model->transition_list->pdata[i];
3208 g_message("Check transition (%d) [%s] has dst_shad[%d] (%s:%s)",
3209 i, transition->name, transition->dst_shadow_list->len,
3210 transition->input->name, transition->output->name);
3211 }
3212 Region_print(alignment->region, "Alignment_has_valid_alignment");
3213 #endif /* DEBUG */
3214 if(alignment->model->init_func)
3215 alignment->model->init_func(alignment->region, user_data);
3216 for(i = 0; i < alignment->model->calc_list->len; i++){
3217 calc = alignment->model->calc_list->pdata[i];
3218 if(calc && calc->init_func)
3219 calc->init_func(alignment->region, user_data);
3220 }
3221 if(alignment->model->start_state->cell_start_func){
3222 start_cell = alignment->model->start_state->cell_start_func
3223 (query_pos, target_pos, user_data);
3224 #ifdef DEBUG
3225 g_print("USING start cell from (%d,%d): ",
3226 query_pos, target_pos);
3227 #endif /* DEBUG */
3228 for(i = 0;
3229 i < (1 + alignment->model->total_shadow_designations); i++){
3230 cell[i] = start_cell[i];
3231 #ifdef DEBUG
3232 g_print("[%d]", cell[i]);
3233 #endif /* DEBUG */
3234 }
3235 #ifdef DEBUG
3236 g_print("\n");
3237 #endif /* DEBUG */
3238 }
3239 score = cell[0];
3240 #ifdef DEBUG
3241 g_message("start with [%d] (%s)", score,
3242 alignment->model->name);
3243 #endif /* DEBUG */
3244 for(i = 0; i < alignment->operation_list->len; i++){
3245 alignment_operation = alignment->operation_list->pdata[i];
3246 for(j = 0; j < alignment_operation->length; j++){
3247 state = alignment_operation->transition->input;
3248 for(k = 0; k < state->src_shadow_list->len; k++){
3249 shadow = state->src_shadow_list->pdata[k];
3250 /* #define DEBUG_SHADOWS */
3251 #ifdef DEBUG_SHADOWS
3252 g_message("start_func on [%s:%s:%s] (%d,%d) shadow(%d)",
3253 state->name,
3254 alignment_operation->transition->name,
3255 shadow->name,
3256 query_pos, target_pos, k);
3257 #endif /* DEBUG_SHADOWS */
3258 cell[1 + shadow->designation] = shadow->start_func(
3259 query_pos, target_pos, user_data);
3260 }
3261 transition = alignment_operation->transition;
3262 for(k = 0; k < transition->dst_shadow_list->len; k++){
3263 shadow = transition->dst_shadow_list->pdata[k];
3264 #ifdef DEBUG_SHADOWS
3265 g_message("end_func on [%s:%s] (%d,%d) shadow(%d)=(%d)",
3266 transition->name,
3267 shadow->name,
3268 query_pos, target_pos, k,
3269 cell[1 + shadow->designation]);
3270 #endif /* DEBUG_SHADOWS */
3271 shadow->end_func(cell[1 + shadow->designation],
3272 query_pos, target_pos, user_data);
3273 }
3274 /**/
3275 t = C4_Calc_score(alignment_operation->transition->calc,
3276 query_pos, target_pos, user_data);
3277 score += t;
3278 #ifdef DEBUG
3279 g_message("add on [%d] (%s) => [%d] at (%d,%d) {%d,%d}",
3280 t, alignment_operation->transition->name,
3281 score, query_pos, target_pos, i, j);
3282 #endif /* DEBUG */
3283 query_pos
3284 += alignment_operation->transition->advance_query;
3285 target_pos
3286 += alignment_operation->transition->advance_target;
3287 }
3288 }
3289 for(i = 0; i < alignment->model->calc_list->len; i++){
3290 calc = alignment->model->calc_list->pdata[i];
3291 if(calc && calc->exit_func)
3292 calc->exit_func(alignment->region, user_data);
3293 }
3294 if(alignment->model->exit_func)
3295 alignment->model->exit_func(alignment->region, user_data);
3296 #ifdef DEBUG
3297 g_message("end with [%d]", score);
3298 #endif /* DEBUG */
3299 #ifdef DEBUG
3300 g_message("CHECKIT: [%d-%d=%d],[%d] [%d-%d=%d],[%d]",
3301 query_pos,
3302 alignment->region->query_start,
3303 (query_pos-alignment->region->query_start),
3304 alignment->region->query_length,
3305 target_pos,
3306 alignment->region->target_start,
3307 (target_pos-alignment->region->target_start),
3308 alignment->region->target_length);
3309 #endif /* DEBUG */
3310 if(score != alignment->score) /* FIXME: temp */
3311 g_warning("Score difference DP[%d] TB[%d]",
3312 alignment->score, score);
3313 g_assert((query_pos-alignment->region->query_start)
3314 == alignment->region->query_length);
3315 g_assert((target_pos-alignment->region->target_start)
3316 == alignment->region->target_length);
3317 g_assert(score == alignment->score);
3318 g_free(cell);
3319 return TRUE;
3320 }
3321 /* FIXME: Should change equality test in case the score is a float
3322 */
3323 #endif /* G_DISABLE_ASSERT */
3324
Alignment_has_valid_path(Alignment * alignment)3325 static gboolean Alignment_has_valid_path(Alignment *alignment){
3326 register C4_State *first_state, *last_state;
3327 register AlignmentOperation *alignment_operation, *prev;
3328 register gint i;
3329 g_assert(alignment);
3330 g_assert(alignment->operation_list);
3331 g_assert(alignment->operation_list->len > 0);
3332 alignment_operation = alignment->operation_list->pdata[0];
3333 first_state = alignment_operation->transition->input;
3334 g_assert(first_state == alignment->model->start_state->state);
3335 alignment_operation = alignment->operation_list->pdata
3336 [alignment->operation_list->len-1];
3337 last_state = alignment_operation->transition->output;
3338 g_assert(last_state == alignment->model->end_state->state);
3339 for(i = 1; i < alignment->operation_list->len; i++){
3340 alignment_operation = alignment->operation_list->pdata[i];
3341 prev = alignment->operation_list->pdata[i-1];
3342 g_assert(alignment_operation->transition->input
3343 == prev->transition->output);
3344 }
3345 return TRUE;
3346 }
3347
Alignment_is_within_scope(Alignment * alignment,Region * seq_region)3348 static gboolean Alignment_is_within_scope(Alignment *alignment,
3349 Region *seq_region){
3350 register gboolean start_at_query_edge,
3351 start_at_target_edge,
3352 end_at_query_edge,
3353 end_at_target_edge;
3354 start_at_query_edge
3355 = (seq_region->query_start == alignment->region->query_start);
3356 start_at_target_edge
3357 = (seq_region->target_start == alignment->region->target_start);
3358 end_at_query_edge = (Region_query_end(alignment->region)
3359 == Region_query_end(seq_region));
3360 end_at_target_edge = (Region_target_end(alignment->region)
3361 == Region_target_end(seq_region));
3362 switch(alignment->model->start_state->scope){
3363 case C4_Scope_ANYWHERE:
3364 break;
3365 case C4_Scope_EDGE:
3366 g_assert(start_at_query_edge || start_at_target_edge);
3367 break;
3368 case C4_Scope_QUERY:
3369 g_assert(start_at_query_edge);
3370 break;
3371 case C4_Scope_TARGET:
3372 g_assert(start_at_target_edge);
3373 break;
3374 case C4_Scope_CORNER:
3375 g_assert(start_at_query_edge && start_at_target_edge);
3376 break;
3377 }
3378 switch(alignment->model->end_state->scope){
3379 case C4_Scope_ANYWHERE:
3380 break;
3381 case C4_Scope_EDGE:
3382 g_assert(end_at_query_edge || end_at_target_edge);
3383 break;
3384 case C4_Scope_QUERY:
3385 g_assert(end_at_query_edge);
3386 break;
3387 case C4_Scope_TARGET:
3388 g_assert(end_at_target_edge);
3389 break;
3390 case C4_Scope_CORNER:
3391 g_assert(end_at_query_edge && end_at_target_edge);
3392 break;
3393 }
3394 return TRUE;
3395 }
3396
Alignment_is_valid(Alignment * alignment,Region * seq_region,gpointer user_data)3397 gboolean Alignment_is_valid(Alignment *alignment, Region *seq_region,
3398 gpointer user_data){
3399 g_assert(alignment);
3400 g_assert(seq_region);
3401 g_assert(Region_is_within(seq_region, alignment->region));
3402 g_assert(Alignment_is_within_scope(alignment, seq_region));
3403 g_assert(Alignment_has_valid_path(alignment));
3404 g_assert(Alignment_has_valid_alignment(alignment, user_data));
3405 return TRUE;
3406 }
3407
3408 /**/
3409
Alignment_import_derived(Alignment * alignment,Alignment * to_add,C4_DerivedModel * derived_model)3410 void Alignment_import_derived(Alignment *alignment,
3411 Alignment *to_add,
3412 C4_DerivedModel *derived_model){
3413 register gint i;
3414 register AlignmentOperation *alignment_operation;
3415 register C4_Transition *transition;
3416 g_assert(alignment->model == derived_model->original);
3417 g_assert(to_add->model == derived_model->derived);
3418 for(i = 0; i < to_add->operation_list->len; i++){
3419 alignment_operation = to_add->operation_list->pdata[i];
3420 transition = derived_model->transition_map
3421 [alignment_operation->transition->id];
3422 Alignment_add(alignment, transition,
3423 alignment_operation->length);
3424 }
3425 return;
3426 }
3427
3428