1 /****************************************************************\
2 *                                                                *
3 *  C4 dynamic programming library - alignment code               *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include <string.h> /* For strlen() */
17 #include <ctype.h>  /* For tolower() */
18 #include <time.h>   /* For time() */
19 
20 #include "match.h"
21 #include "alignment.h"
22 
Alignment_ArgumentSet_create(Argument * arg)23 Alignment_ArgumentSet *Alignment_ArgumentSet_create(Argument *arg){
24     register ArgumentSet *as;
25     static Alignment_ArgumentSet aas = {80, TRUE};
26     if(arg){
27         as = ArgumentSet_create("Alignment options");
28         ArgumentSet_add_option(as, '\0', "alignmentwidth", NULL,
29             "Alignment display width", "80", Argument_parse_int,
30             &aas.alignment_width);
31         ArgumentSet_add_option(as, '\0', "forwardcoordinates", NULL,
32             "Report all coordinates on the forward strand", "TRUE",
33             Argument_parse_boolean, &aas.forward_strand_coords);
34         Argument_absorb_ArgumentSet(arg, as);
35         }
36     return &aas;
37     }
38 
39 /**/
40 
Alignment_create(C4_Model * model,Region * region,C4_Score score)41 Alignment *Alignment_create(C4_Model *model, Region *region,
42                             C4_Score score){
43     register Alignment *alignment = g_new(Alignment, 1);
44     g_assert(model);
45     g_assert(region);
46     alignment->ref_count = 1;
47     alignment->region = Region_share(region);
48     alignment->score = score;
49     alignment->operation_list = g_ptr_array_new();
50     alignment->model = C4_Model_share(model);
51     return alignment;
52     }
53 
Alignment_share(Alignment * alignment)54 Alignment *Alignment_share(Alignment *alignment){
55     g_assert(alignment);
56     alignment->ref_count++;
57     return alignment;
58     }
59 
Alignment_destroy(Alignment * alignment)60 void Alignment_destroy(Alignment *alignment){
61     register gint i;
62     g_assert(alignment);
63     if(--alignment->ref_count)
64         return;
65     for(i = 0; i < alignment->operation_list->len; i++)
66         g_free(alignment->operation_list->pdata[i]);
67     g_ptr_array_free(alignment->operation_list, TRUE);
68     Region_destroy(alignment->region);
69     C4_Model_destroy(alignment->model);
70     g_free(alignment);
71     return;
72     }
73 
Alignment_add(Alignment * alignment,C4_Transition * transition,gint length)74 void Alignment_add(Alignment *alignment, C4_Transition *transition,
75                    gint length){
76     register AlignmentOperation *alignment_operation, *prev;
77     g_assert(alignment);
78     g_assert(transition);
79     if(alignment->operation_list->len){
80         prev = alignment->operation_list->pdata
81               [alignment->operation_list->len-1];
82         if(prev->transition == transition){
83             prev->length += length;
84             g_assert(prev->length >= 0); /* Cannot have -ve total */
85             if(prev->length == 0){ /* Drop the transition */
86                 g_free(prev);
87                 g_ptr_array_set_size(alignment->operation_list,
88                                      alignment->operation_list->len-1);
89                 }
90             return;
91             }
92         g_assert(prev->transition->output == transition->input);
93         }
94     alignment_operation = g_new(AlignmentOperation, 1);
95     alignment_operation->transition = transition;
96     alignment_operation->length = length;
97     g_ptr_array_add(alignment->operation_list, alignment_operation);
98     return;
99     }
100 
101 /**/
102 
Alignment_match_get_symbol(Sequence * seq,gint pos,gint advance,Translate * translate)103 static gchar Alignment_match_get_symbol(Sequence *seq,
104                                         gint pos, gint advance,
105                                         Translate *translate){
106     register gchar symbol = '\0';
107     switch(advance){
108         case 1:
109             symbol = Sequence_get_symbol(seq, pos);
110             break;
111         case 3:
112             g_assert(translate);
113             symbol = Translate_base(translate,
114                                Sequence_get_symbol(seq, pos),
115                                Sequence_get_symbol(seq, pos+1),
116                                Sequence_get_symbol(seq, pos+2));
117             break;
118         default:
119             g_error("Cannot process match advance of [%d]", advance);
120             break;
121         }
122     return symbol;
123     }
124 
Alignment_match_get_string(Sequence * seq,gint pos,gint advance,gint max,Translate * translate)125 static gchar *Alignment_match_get_string(Sequence *seq,
126                        gint pos, gint advance, gint max,
127                        Translate *translate){
128     register gint ch;
129     switch(max){
130         case 1:
131             g_assert(advance == 1);
132             return g_strdup_printf("%c", Sequence_get_symbol(seq, pos));
133             break;
134         case 3:
135             switch(advance){
136                 case 1:
137                     g_assert(seq->alphabet->type == Alphabet_Type_PROTEIN);
138                     ch = Sequence_get_symbol(seq, pos);
139                     return g_strdup(Alphabet_aa2tla(ch));
140                     break;
141                 case 3:
142                     g_assert(seq->alphabet->type == Alphabet_Type_DNA);
143                     return g_strdup_printf("%c%c%c", /* codon */
144                                     Sequence_get_symbol(seq, pos),
145                                     Sequence_get_symbol(seq, pos+1),
146                                     Sequence_get_symbol(seq, pos+2));
147                     break;
148                 default:
149                     g_error("Cannot handle advance [%d]", advance);
150                     break;
151                 }
152             break;
153         default:
154             g_error("Cannot find match display for max advance of [%d]",
155                     advance);
156             break;
157         }
158     return NULL;
159     }
160 
161 /**/
162 
Alignment_get_gene_orientation(Alignment * alignment)163 static gchar Alignment_get_gene_orientation(Alignment *alignment){
164     register gint i;
165     register AlignmentOperation *ao;
166     for(i = 0; i < alignment->operation_list->len; i++){
167         ao = alignment->operation_list->pdata[i];
168         if(ao->transition->label == C4_Label_5SS)
169             return '+';
170         if(ao->transition->label == C4_Label_3SS)
171             return '-';
172         }
173     return '.';
174     }
175 
Alignment_get_coordinate(Alignment * alignment,Sequence * query,Sequence * target,gboolean on_query,gboolean report_start)176 static gint Alignment_get_coordinate(Alignment *alignment,
177                                      Sequence *query,
178                                      Sequence *target,
179                                      gboolean on_query,
180                                      gboolean report_start){
181     register gint pos;
182     register Alignment_ArgumentSet *aas
183         = Alignment_ArgumentSet_create(NULL);
184     if(on_query){
185         if(report_start){
186             pos = alignment->region->query_start;
187         } else {
188             pos = Region_query_end(alignment->region);
189             }
190         if(aas->forward_strand_coords
191         && (query->strand == Sequence_Strand_REVCOMP)){
192             pos = query->len - pos;
193             }
194     } else { /* on_target */
195         if(report_start){
196             pos = alignment->region->target_start;
197         } else {
198             pos = Region_target_end(alignment->region);
199             }
200         if(aas->forward_strand_coords
201         && (target->strand == Sequence_Strand_REVCOMP)){
202             pos = target->len - pos;
203             }
204         }
205     return pos;
206     }
207 
Alignment_convert_coordinate(Alignment * alignment,Sequence * query,Sequence * target,gint query_pos,gint target_pos,gboolean on_query)208 static gint Alignment_convert_coordinate(Alignment *alignment,
209                                          Sequence *query,
210                                          Sequence *target,
211                                          gint query_pos,
212                                          gint target_pos,
213                                          gboolean on_query){
214     register gint pos;
215     register Alignment_ArgumentSet *aas
216         = Alignment_ArgumentSet_create(NULL);
217     if(on_query){
218         pos = query_pos;
219         if(aas->forward_strand_coords
220         && (query->strand == Sequence_Strand_REVCOMP)){
221             pos = query->len - pos;
222             }
223     } else { /* on_target */
224         pos = target_pos;
225         if(aas->forward_strand_coords
226         && (target->strand == Sequence_Strand_REVCOMP)){
227             pos = target->len - pos;
228             }
229         }
230     return pos;
231     }
232 
Alignment_get_max_pos_len(Alignment * alignment,Sequence * query,Sequence * target)233 static gint Alignment_get_max_pos_len(Alignment *alignment,
234                           Sequence *query, Sequence *target){
235     register gint qmax = MAX(
236       Alignment_get_coordinate(alignment, query, target, TRUE, TRUE),
237       Alignment_get_coordinate(alignment, query, target, TRUE, FALSE));
238     register gint tmax = MAX(
239       Alignment_get_coordinate(alignment, query, target, FALSE, TRUE),
240       Alignment_get_coordinate(alignment, query, target, FALSE, FALSE));
241     register gint max = MAX(qmax, tmax);
242     register gchar *tmpstr = g_strdup_printf("%d", max);
243     register gint maxlen = strlen(tmpstr);
244     g_free(tmpstr);
245     return maxlen;
246     }
247 
248 /**/
249 
250 typedef struct {
251     gint query_pos;
252     gint target_pos;
253 } AlignmentPosition;
254 
255 typedef struct {
256     gint query_separation;
257     gint target_separation;
258 } AlignmentSeparation;
259 
260 typedef struct {
261       GString *outer_query;
262       GString *inner_query;
263       GString *middle;
264       GString *inner_target;
265       GString *outer_target;
266     GPtrArray *row_marker; /* List containing AlignmentPosition */
267          gint  max_pos_len;
268          gint  width;
269          gint  limit;
270          gint  query_intron_count;
271          gint  target_intron_count;
272          gint  joint_intron_count;
273          gint  intron_advance_query;
274          gint  intron_advance_target;
275         gchar  gene_orientation;
276          gint  ner_count;
277          gint  ner_advance_query;
278          gint  ner_advance_target;
279          gint  curr_split_codon_count; /* Add 2 for each split codon */
280     GPtrArray *split_codon_separation_list;
281                /* List containing AlignmentSeparation */
282 } AlignmentView;
283 
AlignmentView_create(Alignment * alignment,Sequence * query,Sequence * target)284 static AlignmentView *AlignmentView_create(Alignment *alignment,
285                                            Sequence *query,
286                                            Sequence *target){
287     register AlignmentView *av = g_new(AlignmentView, 1);
288     register AlignmentOperation *ao;
289     register AlignmentSeparation *curr_split_codon_separation = NULL;
290     register gint i;
291     register Alignment_ArgumentSet *aas
292            = Alignment_ArgumentSet_create(NULL);
293     av->outer_query  = g_string_sized_new(16);
294     if(alignment->model->max_query_advance == 3)
295         av->inner_query = g_string_sized_new(16);
296     else
297         av->inner_query = NULL;
298     av->middle = g_string_sized_new(16);
299     if(alignment->model->max_target_advance == 3)
300         av->inner_target = g_string_sized_new(16);
301     else
302         av->inner_target = NULL;
303     av->outer_target  = g_string_sized_new(16);
304     av->row_marker = g_ptr_array_new();
305     av->max_pos_len = Alignment_get_max_pos_len(alignment,
306                                                 query, target);
307     av->width = aas->alignment_width-((av->max_pos_len + 5) << 1);
308     g_assert(av->width > 0);
309     av->limit = av->width;
310     av->query_intron_count = 0;
311     av->target_intron_count = 0;
312     av->joint_intron_count = 0;
313     av->intron_advance_query = 0;
314     av->intron_advance_target = 0;
315     av->gene_orientation = Alignment_get_gene_orientation(alignment);
316     av->ner_count = 0;
317     av->ner_advance_query = 0;
318     av->ner_advance_target = 0;
319     av->curr_split_codon_count = 0;
320     av->split_codon_separation_list = g_ptr_array_new();
321     for(i = 0; i < alignment->operation_list->len; i++){
322         ao = alignment->operation_list->pdata[i];
323         if(curr_split_codon_separation){
324             if(ao->transition->label == C4_Label_SPLIT_CODON){
325                 g_ptr_array_add(av->split_codon_separation_list,
326                                 curr_split_codon_separation);
327                 curr_split_codon_separation = NULL;
328             } else {
329                 curr_split_codon_separation->query_separation
330                     += (ao->length * ao->transition->advance_query);
331                 curr_split_codon_separation->target_separation
332                     += (ao->length * ao->transition->advance_target);
333                 }
334         } else {
335             if(ao->transition->label == C4_Label_SPLIT_CODON){
336                 curr_split_codon_separation
337                         = g_new(AlignmentSeparation, 1);
338                 curr_split_codon_separation->query_separation
339                     = (ao->length * ao->transition->advance_query);
340                 curr_split_codon_separation->target_separation
341                     = (ao->length * ao->transition->advance_target);
342                 }
343             }
344         }
345     /* Check we have an even number of split codons */
346     g_assert(!curr_split_codon_separation);
347     return av;
348     }
349 
AlignmentView_destroy(AlignmentView * av)350 static void AlignmentView_destroy(AlignmentView *av){
351     register gint i;
352     for(i = 0; i < av->split_codon_separation_list->len; i++)
353         g_free(av->split_codon_separation_list->pdata[i]);
354     g_ptr_array_free(av->split_codon_separation_list, TRUE);
355     g_string_free(av->outer_query,  TRUE);
356     g_string_free(av->middle, TRUE);
357     g_string_free(av->outer_target,  TRUE);
358 
359     if(av->inner_query)
360         g_string_free(av->inner_query, TRUE);
361     if(av->inner_target)
362         g_string_free(av->inner_target, TRUE);
363 
364     for(i = 0; i < av->row_marker->len; i++)
365         g_free(av->row_marker->pdata[i]);
366     g_ptr_array_free(av->row_marker, TRUE);
367     g_free(av);
368     return;
369     }
370 
AlignmentView_add(AlignmentView * av,gchar * query_string,gchar * inner_query_string,gchar * match_string,gchar * inner_target_string,gchar * target_string,gint query_pos,gint target_pos)371 static void AlignmentView_add(AlignmentView *av,
372                               gchar *query_string,
373                               gchar *inner_query_string,
374                               gchar *match_string,
375                               gchar *inner_target_string,
376                               gchar *target_string,
377                               gint query_pos, gint target_pos){
378     register AlignmentPosition *apos;
379     register gint i;
380     g_assert(strlen(query_string) == strlen(match_string));
381     g_assert(strlen(match_string) == strlen(target_string));
382     if(av->inner_query){
383         if(inner_query_string){
384             g_assert(strlen(match_string)
385                   == strlen(inner_query_string));
386             g_string_append(av->inner_query, inner_query_string);
387         } else {
388             for(i = strlen(match_string)-1; i >=0; i--)
389                 g_string_append_c(av->inner_query, ' ');
390             }
391         }
392     if(av->inner_target){
393         if(inner_target_string){
394             g_assert(strlen(match_string)
395                   == strlen(inner_target_string));
396             g_string_append(av->inner_target, inner_target_string);
397         } else {
398             for(i = strlen(match_string)-1; i >=0; i--)
399                 g_string_append_c(av->inner_target, ' ');
400             }
401         }
402     g_string_append(av->outer_query,  query_string);
403     g_string_append(av->middle, match_string);
404     g_string_append(av->outer_target,  target_string);
405     if(av->outer_query->len >= av->limit){
406         apos = g_new(AlignmentPosition, 1);
407         apos->query_pos  = query_pos;
408         apos->target_pos = target_pos;
409         g_ptr_array_add(av->row_marker, apos);
410         av->limit += av->width;
411         }
412     return;
413     }
414 
415 typedef struct {
416     gchar *match_string;
417     gchar *codon;
418 } AlignmentMatchData;
419 
Alignment_match_translate_reverse(gchar * dna,gint length,gpointer user_data)420 static void Alignment_match_translate_reverse(gchar *dna, gint length,
421                                               gpointer user_data){
422     register AlignmentMatchData *amd = user_data;
423     register gint i;
424     for(i = 0; i < 3; i++)
425         if(dna[i] == amd->codon[i])
426             amd->match_string[i] = '!';
427     return;
428     }
429 
Alignment_get_equiv_symbol(gchar symbol_a,gchar symbol_b,Submat * submat)430 static gchar Alignment_get_equiv_symbol(gchar symbol_a, gchar symbol_b,
431                                         Submat *submat){
432     register gint score;
433     g_assert(symbol_a);
434     g_assert(symbol_b);
435     if(submat){
436         score = Submat_lookup(submat, symbol_a, symbol_b);
437         if(score == 0)
438             return '.';
439         if(score > 0){
440             if(toupper(symbol_a) == toupper(symbol_b))
441                 return '|';
442             else
443                 return ':';
444             }
445     } else {
446         if(symbol_a == symbol_b)
447             return '|';
448         }
449     return ' ';
450     }
451 
Alignment_get_codon_match_string(gchar * codon,gchar aa,Submat * protein_submat,Translate * translate)452 static gchar *Alignment_get_codon_match_string(gchar *codon, gchar aa,
453                         Submat *protein_submat, Translate *translate){
454     register gchar codon_aa = Translate_codon(translate, codon);
455     register gchar match_symbol;
456     register gchar *codon_match;
457     gchar aa_seq[2];
458     AlignmentMatchData amd;
459     g_assert(translate);
460     g_assert(protein_submat);
461     match_symbol = Alignment_get_equiv_symbol(codon_aa, aa,
462                                               protein_submat);
463     codon_match = g_strnfill(3, match_symbol);
464     if(match_symbol != '|'){
465         amd.match_string = codon_match;
466         amd.codon = codon;
467         aa_seq[0] = aa;
468         aa_seq[1] = '\0';
469         Translate_reverse(translate, aa_seq, 1,
470                           Alignment_match_translate_reverse, &amd);
471         }
472     return codon_match;
473     }
474 
AlignmentView_add_MATCH(AlignmentView * av,C4_Transition * transition,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Submat * dna_submat,Submat * protein_submat,Translate * translate)475 static void AlignmentView_add_MATCH(AlignmentView *av,
476                 C4_Transition *transition,
477                 gint total_length, Sequence *query, Sequence *target,
478                 gint query_pos, gint target_pos,
479                 Submat *dna_submat, Submat *protein_submat,
480                 Translate *translate){
481     register gchar query_symbol, target_symbol;
482     register gchar *query_string, *target_string;
483     register gchar *inner_query_string = NULL,
484                    *inner_target_string = NULL;
485     register gint max_advance;
486     register gint i;
487     register gint curr_query_pos = query_pos,
488                   curr_target_pos = target_pos;
489     register Match *match = (Match*)transition->label_data;
490     gchar match_string[4]; /* FIXME: temp: should be max_advance long */
491     for(i = 0; i < total_length; i++){
492         max_advance = MAX(transition->advance_query,
493                           transition->advance_target);
494         query_string = Alignment_match_get_string(query,
495                            curr_query_pos, transition->advance_query,
496                            max_advance, translate);
497         target_string = Alignment_match_get_string(target,
498                            curr_target_pos, transition->advance_target,
499                            max_advance, translate);
500         query_symbol = Alignment_match_get_symbol(query,
501                            curr_query_pos, transition->advance_query,
502                            translate);
503         target_symbol = Alignment_match_get_symbol(target,
504                            curr_target_pos, transition->advance_target,
505                            translate);
506         if(transition->advance_query == 3)
507             inner_query_string = Alphabet_aa2tla(query_symbol);
508         if(transition->advance_target == 3)
509             inner_target_string = Alphabet_aa2tla(target_symbol);
510         if(match){
511             match->display_func(match, query, target,
512                                 curr_query_pos, curr_target_pos,
513                                 match_string);
514         } else { /* In the absense of Match */
515             g_assert(transition->advance_query == 1);
516             g_assert(transition->advance_target == 1);
517             match_string[0] = (Sequence_get_symbol(query, query_pos)
518                             == Sequence_get_symbol(target, target_pos))
519                             ?'|':' ';
520             match_string[1] = '\0';
521             }
522         AlignmentView_add(av, query_string, inner_query_string,
523                               match_string,
524                               inner_target_string, target_string,
525                               curr_query_pos, curr_target_pos);
526         g_free(query_string);
527         g_free(target_string);
528         curr_query_pos += transition->advance_query;
529         curr_target_pos += transition->advance_target;
530         }
531     return;
532     }
533 
AlignmentView_add_GAP(AlignmentView * av,gint advance_query,gint advance_target,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Translate * translate)534 static void AlignmentView_add_GAP(AlignmentView *av,
535                 gint advance_query, gint advance_target,
536                 gint total_length, Sequence *query, Sequence *target,
537                 gint query_pos, gint target_pos, Translate *translate){
538     register gint i, j;
539     register gint curr_query_pos = query_pos,
540                   curr_target_pos = target_pos;
541     register gchar *seq_string = g_new(gchar, 4),
542                    *match_string = g_new(gchar, 4),
543                    *gap_string = g_new(gchar, 4);
544     register Alphabet_Type emitted_alphabet_type;
545     register gchar tr_codon, *tr_codon_name;
546     register gboolean is_translating
547         = ( ( (query->alphabet->type == Alphabet_Type_PROTEIN)
548            && (target->alphabet->type == Alphabet_Type_DNA))
549           ||( (query->alphabet->type == Alphabet_Type_DNA)
550            && (target->alphabet->type == Alphabet_Type_PROTEIN))
551           || ((advance_query|advance_target) == 3));
552     g_assert(!(advance_query && advance_target));
553     match_string[0] = ' ';
554     gap_string[0] = '-';
555     if(advance_query)
556         emitted_alphabet_type = query->alphabet->type;
557     else /* advance_target */
558         emitted_alphabet_type = target->alphabet->type;
559     for(i = 0; i < total_length; i++){
560         for(j = 0; j < (advance_query|advance_target); j++){
561             if(advance_query)
562                 seq_string[j] = Sequence_get_symbol(query, curr_query_pos+j);
563             else /* advance_target */
564                 seq_string[j] = Sequence_get_symbol(target, curr_target_pos+j);
565             match_string[j] = match_string[0];
566             gap_string[j] = gap_string[0];
567             }
568         seq_string[j] = match_string[j] = gap_string[j] = '\0';
569         tr_codon_name = NULL;
570         if(is_translating){
571             if(emitted_alphabet_type == Alphabet_Type_PROTEIN){
572                 strncpy(seq_string, Alphabet_aa2tla(seq_string[0]), 3);
573                 match_string[2] = match_string[1] = match_string[0];
574                 gap_string[2] = gap_string[1] = gap_string[0];
575                 seq_string[3] = match_string[3] = gap_string[3] = '\0';
576                 }
577             if((advance_query|advance_target) == 3){
578                 gap_string[0] = '<';
579                 gap_string[1] = '-';
580                 gap_string[2] = '>';
581                 gap_string[3] = '\0';
582                 tr_codon = Translate_codon(translate, seq_string);
583                 tr_codon_name = Alphabet_aa2tla(tr_codon);
584                 }
585             }
586         if(advance_query)
587             AlignmentView_add(av, seq_string,
588                                   tr_codon_name,
589                                   match_string,
590                                   is_translating?gap_string:NULL,
591                                   gap_string,
592                                   curr_query_pos, curr_target_pos);
593         else
594             AlignmentView_add(av, gap_string,
595                                   is_translating?gap_string:NULL,
596                                   match_string,
597                                   tr_codon_name,
598                                   seq_string,
599                                   curr_query_pos, curr_target_pos);
600         curr_query_pos += advance_query;
601         curr_target_pos += advance_target;
602         }
603     g_free(seq_string);
604     g_free(match_string);
605     g_free(gap_string);
606     return;
607     }
608 /* Display gap:[- N] codon[<->   NNN] frameshift [-*N]
609  */
610 
AlignmentView_set_consensus_ss_string(AlignmentView * av,gboolean is_5_prime,gchar * splice_site,gchar * consensus_string)611 static void AlignmentView_set_consensus_ss_string(AlignmentView *av,
612     gboolean is_5_prime, gchar *splice_site, gchar *consensus_string){
613     register gchar cons_a, cons_b;
614     if(av->gene_orientation == '+'){
615         if(is_5_prime){ /* FWD: gt..ag */
616             cons_a = 'G';
617             cons_b = 'T';
618         } else {
619             cons_a = 'A';
620             cons_b = 'G';
621             }
622     } else {
623         g_assert(av->gene_orientation == '-');
624         if(is_5_prime){ /* REV: ct..ac */
625             cons_a = 'A';
626             cons_b = 'C';
627         } else {
628             cons_a = 'C';
629             cons_b = 'T';
630             }
631         }
632     if(toupper(splice_site[0]) == cons_a)
633         consensus_string[0] = '+';
634     else
635         consensus_string[0] = '-';
636     if(toupper(splice_site[1]) == cons_b)
637         consensus_string[1] = '+';
638     else
639         consensus_string[1] = '-';
640     return;
641     }
642 
AlignmentView_add_SPLICE_SITE(AlignmentView * av,gint advance_query,gint advance_target,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,gboolean is_5_prime,C4_Transition * last_match)643 static void AlignmentView_add_SPLICE_SITE(AlignmentView *av,
644                 gint advance_query, gint advance_target,
645                 gint total_length, Sequence *query, Sequence *target,
646                 gint query_pos, gint target_pos, gboolean is_5_prime,
647                 C4_Transition *last_match){
648     register gchar *gap_string = "  ";
649     static gchar qy_seq_string[3], tg_seq_string[3],
650                  qy_cons_string[3], tg_cons_string[3];
651     qy_cons_string[0] = qy_cons_string[1] = ' ';
652     tg_cons_string[0] = tg_cons_string[1] = ' ';
653     qy_cons_string[2] = tg_cons_string[2] = '\0';
654     qy_seq_string[2] = tg_seq_string[2] = '\0';
655     if(advance_query == 2){
656         qy_seq_string[0] = Sequence_get_symbol(query, query_pos);
657         qy_seq_string[1] = Sequence_get_symbol(query, query_pos+1);
658         AlignmentView_set_consensus_ss_string(av, is_5_prime,
659                                 qy_seq_string, qy_cons_string);
660         g_strdown(qy_seq_string);
661         }
662     if(advance_target == 2){
663         tg_seq_string[0] = Sequence_get_symbol(target, target_pos);
664         tg_seq_string[1] = Sequence_get_symbol(target, target_pos+1);
665         AlignmentView_set_consensus_ss_string(av, is_5_prime,
666                                 tg_seq_string, tg_cons_string);
667         g_strdown(tg_seq_string);
668         }
669     if(advance_query == 2){
670         if(advance_target == 2){ /* Joint intron */
671             AlignmentView_add(av, qy_seq_string,
672                           qy_cons_string, gap_string, tg_cons_string,
673                           tg_seq_string,
674                           query_pos, target_pos);
675         } else { /* Query intron */
676             g_assert(advance_target == 0);
677             g_strdown(qy_seq_string);
678             g_assert(last_match);
679             if(last_match->advance_query == 3)
680                 AlignmentView_add(av, qy_seq_string, qy_cons_string, gap_string,
681                                   gap_string, gap_string, query_pos, target_pos);
682             else
683                 AlignmentView_add(av, qy_seq_string,
684                                   NULL, qy_cons_string, NULL,
685                                   gap_string, query_pos, target_pos);
686             }
687     } else { /* Target intron */
688         g_assert(advance_query == 0);
689         g_assert(advance_target == 2);
690         g_strdown(tg_seq_string);
691         g_assert(last_match);
692         if(last_match->advance_target == 3)
693             AlignmentView_add(av, gap_string, gap_string,
694                               gap_string, tg_cons_string, tg_seq_string,
695                               query_pos, target_pos);
696         else
697             AlignmentView_add(av, gap_string,
698                               NULL, tg_cons_string, NULL,
699                               tg_seq_string, query_pos, target_pos);
700         }
701     return;
702     }
703 
AlignmentView_add_INTRON(AlignmentView * av,gint advance_query,gint advance_target,Sequence * query,Sequence * target,gint query_pos,gint target_pos,C4_Transition * last_match)704 static void AlignmentView_add_INTRON(AlignmentView *av,
705                 gint advance_query, gint advance_target,
706                 Sequence *query, Sequence *target,
707                 gint query_pos, gint target_pos,
708                 C4_Transition *last_match){
709     register gchar *dir_sign = "????", *label = NULL;
710     register gint fill, intron_count = 0;
711     register gchar *name_string, *middle_string, *gap_string,
712                    *pad_string, *intron_name;
713     g_assert(last_match);
714     if(av->gene_orientation == '+'){
715         dir_sign = ">>>>";
716     } else if(av->gene_orientation == '-'){
717         dir_sign = "<<<<";
718         }
719     if(advance_query){
720         if(advance_target){
721             intron_count = ++av->joint_intron_count;
722             intron_name = "Joint";
723             label = g_strdup_printf("%d bp // %d bp",
724                         advance_query+4, advance_target+4);
725         } else {
726             intron_count = ++av->query_intron_count;
727             intron_name = "Query";
728             label = g_strdup_printf("%d bp", advance_query+4);
729             }
730     } else {
731         intron_count = ++av->target_intron_count;
732         intron_name = "Target";
733         label = g_strdup_printf("%d bp", advance_target+4);
734         }
735     name_string = g_strdup_printf("%s %s Intron %d %s",
736                       dir_sign, intron_name, intron_count, dir_sign);
737     g_assert(strlen(name_string) > strlen(label));
738     fill = (strlen(name_string)-strlen(label))+1;
739     middle_string = g_strdup_printf("%*c%s%*c",
740             ((fill|1)>>1), ' ', label, ((fill-1)>>1), ' ');
741     gap_string = g_strnfill(strlen(name_string), '.');
742     pad_string = g_strnfill(strlen(name_string), '^');
743     g_assert(strlen(name_string) == strlen(middle_string));
744     g_assert(strlen(middle_string) == strlen(gap_string));
745     if(advance_query){
746         if(advance_target){ /* joint intron */
747             AlignmentView_add(av, name_string, NULL, middle_string, NULL,
748                               name_string, query_pos, target_pos);
749         } else { /* query intron */
750             if(last_match->advance_query == 3)
751                 AlignmentView_add(av, gap_string, pad_string, middle_string,
752                                   pad_string, name_string, query_pos, target_pos);
753             else
754                 AlignmentView_add(av, gap_string, NULL, middle_string, NULL,
755                                   name_string, query_pos, target_pos);
756             }
757     } else { /* target intron */
758         if(last_match->advance_target == 3)
759             AlignmentView_add(av, name_string, pad_string, middle_string,
760                               pad_string, gap_string, query_pos, target_pos);
761         else
762             AlignmentView_add(av, name_string, NULL, middle_string, NULL,
763                               gap_string, query_pos, target_pos);
764         }
765     g_free(label);
766     g_free(name_string);
767     g_free(gap_string);
768     g_free(pad_string);
769     g_free(middle_string);
770     return;
771     }
772 
AlignmentView_add_NER(AlignmentView * av,gint advance_query,gint advance_target,gint query_pos,gint target_pos)773 static void AlignmentView_add_NER(AlignmentView *av,
774                           gint advance_query, gint advance_target,
775                           gint query_pos, gint target_pos){
776     register gchar
777         *upper_string  = g_strdup_printf("%d", advance_query),
778         *middle_string = g_strdup_printf("NER %d", ++av->ner_count),
779         *lower_string  = g_strdup_printf("%d", advance_target);
780     register gint upper_len = strlen(upper_string),
781                   middle_len = strlen(middle_string),
782                   lower_len = strlen(lower_string),
783                   max_len;
784     register gchar *upper_padded, *middle_padded, *lower_padded;
785     max_len = upper_len;
786     if(max_len < middle_len)
787         max_len = middle_len;
788     if(max_len < lower_len)
789         max_len = lower_len;
790     upper_padded = g_strdup_printf("--<%*c%s%*c>--",
791             1+(((max_len-upper_len)+1)>>1), ' ',
792             upper_string,
793             1+((max_len-upper_len)>>1), ' ');
794     middle_padded = g_strdup_printf("--<%*c%s%*c>--",
795             1+(((max_len-middle_len)+1)>>1), ' ',
796             middle_string,
797             1+((max_len-middle_len)>>1), ' ');
798     lower_padded = g_strdup_printf("--<%*c%s%*c>--",
799             1+(((max_len-lower_len)+1)>>1), ' ',
800             lower_string,
801             1+((max_len-lower_len)>>1), ' ');
802     g_free(upper_string);
803     g_free(middle_string);
804     g_free(lower_string);
805     AlignmentView_add(av, upper_padded, NULL, middle_padded, NULL,
806                       lower_padded, query_pos, target_pos);
807     g_free(upper_padded);
808     g_free(middle_padded);
809     g_free(lower_padded);
810     return;
811     }
812 
813 #define AlignmentView_combine_advance(advance_query, advance_target) \
814         (((advance_query) << 8) | (advance_target))
815 
AlignmentView_add_SPLIT_CODON(AlignmentView * av,Sequence * query,Sequence * target,gint advance_query,gint advance_target,gint query_pos,gint target_pos,Submat * protein_submat,Translate * translate)816 static void AlignmentView_add_SPLIT_CODON(AlignmentView *av,
817             Sequence *query, Sequence *target,
818             gint advance_query, gint advance_target,
819             gint query_pos, gint target_pos,
820             Submat *protein_submat, Translate *translate){
821     register gchar *query_string = NULL, *target_string = NULL,
822                    *match_string = NULL, *codon_match;
823     register gchar qy_aa_symbol = '\0', tg_aa_symbol = '\0',
824                   *qy_aa_name = NULL, *tg_aa_name = NULL,
825                    *tr_codon_name;
826     register gchar *inner_query_string = NULL,
827                    *inner_target_string = NULL;
828     register gint pos_to_start = -1, num_to_print;
829     register gint qp0 = 0, qp1 = 0, qp2 = 0,
830                   tp0 = 0, tp1 = 0, tp2 = 0;
831     register gboolean query_is_dna, target_is_dna, before_intron;
832     gchar qy_codon[4], tg_codon[4];
833     register AlignmentSeparation *as
834         = av->split_codon_separation_list->pdata
835          [av->curr_split_codon_count >> 1];
836     g_assert(advance_query || advance_target);
837     query_is_dna = (query->alphabet->type == Alphabet_Type_DNA);
838     target_is_dna = (target->alphabet->type == Alphabet_Type_DNA);
839     before_intron = (av->curr_split_codon_count & 1)?FALSE:TRUE;
840     qy_codon[0] = qy_codon[1] = qy_codon[2] = qy_codon[3] = '\0';
841     tg_codon[0] = tg_codon[1] = tg_codon[2] = tg_codon[3] = '\0';
842     /**/
843     if(query_is_dna && target_is_dna){
844         switch(AlignmentView_combine_advance(
845                     advance_query, advance_target)){
846             case AlignmentView_combine_advance(1, 1):
847                 if(before_intron){
848                     pos_to_start = 0;
849                     qp0 = query_pos;
850                     qp1 = query_pos + as->query_separation;
851                     qp2 = query_pos + as->query_separation + 1;
852                     tp0 = target_pos;
853                     tp1 = target_pos + as->target_separation;
854                     tp2 = target_pos + as->target_separation + 1;
855                 } else { /* after_intron */
856                     pos_to_start = 2;
857                     qp0 = query_pos - as->query_separation;
858                     qp1 = query_pos - as->query_separation + 1;
859                     qp2 = query_pos;
860                     tp0 = target_pos - as->target_separation;
861                     tp1 = target_pos - as->target_separation + 1;
862                     tp2 = target_pos;
863                     }
864                 break;
865             case AlignmentView_combine_advance(2, 2):
866                 if(before_intron){
867                     pos_to_start = 0;
868                     qp0 = query_pos;
869                     qp1 = query_pos + 1;
870                     qp2 = query_pos + as->query_separation;
871                     tp0 = target_pos;
872                     tp1 = target_pos + 1;
873                     tp2 = target_pos + as->target_separation;
874                 } else { /* after_intron */
875                     pos_to_start = 1;
876                     qp0 = query_pos - as->query_separation;
877                     qp1 = query_pos;
878                     qp2 = query_pos + 1;
879                     tp0 = target_pos - as->target_separation;
880                     tp1 = target_pos;
881                     tp2 = target_pos + 1;
882                     }
883                 break;
884             default:
885                 g_error("Unexpected d2d split codon [%d,%d]",
886                         advance_query, advance_target);
887                 break;
888             }
889         qy_codon[0] = Sequence_get_symbol(query, qp0);
890         qy_codon[1] = Sequence_get_symbol(query, qp1);
891         qy_codon[2] = Sequence_get_symbol(query, qp2);
892         tg_codon[0] = Sequence_get_symbol(target, tp0);
893         tg_codon[1] = Sequence_get_symbol(target, tp1);
894         tg_codon[2] = Sequence_get_symbol(target, tp2);
895     } else {
896         if(query_is_dna){
897             g_assert(target->alphabet->type == Alphabet_Type_PROTEIN);
898             tg_aa_symbol = Sequence_get_symbol(target, target_pos);
899             switch(AlignmentView_combine_advance(
900                         advance_query, advance_target)){
901                 case AlignmentView_combine_advance(1, 0):
902                     pos_to_start = 0;
903                     qp0 = query_pos;
904                     qp1 = query_pos + as->query_separation;
905                     qp2 = query_pos + as->query_separation + 1;
906                     break;
907                 case AlignmentView_combine_advance(2, 0):
908                     pos_to_start = 0;
909                     qp0 = query_pos;
910                     qp1 = query_pos + 1;
911                     qp2 = query_pos + as->query_separation;
912                     break;
913                 case AlignmentView_combine_advance(2, 1):
914                     pos_to_start = 1;
915                     qp0 = query_pos - as->query_separation;
916                     qp1 = query_pos;
917                     qp2 = query_pos + 1;
918                     break;
919                 case AlignmentView_combine_advance(1, 1):
920                     pos_to_start = 2;
921                     qp0 = query_pos - as->query_separation;
922                     qp1 = query_pos - as->query_separation + 1;
923                     qp2 = query_pos;
924                     break;
925                 default:
926                     g_error("Unexpected d2p split codon [%d,%d]",
927                             advance_query, advance_target);
928                     break;
929                 }
930             qy_codon[0] = Sequence_get_symbol(query, qp0);
931             qy_codon[1] = Sequence_get_symbol(query, qp1);
932             qy_codon[2] = Sequence_get_symbol(query, qp2);
933         } else {
934             g_assert(query->alphabet->type == Alphabet_Type_PROTEIN);
935             g_assert(target->alphabet->type == Alphabet_Type_DNA);
936             qy_aa_symbol = Sequence_get_symbol(query, query_pos);
937             switch(AlignmentView_combine_advance(
938                         advance_query, advance_target)){
939                 case AlignmentView_combine_advance(0, 1):
940                     pos_to_start = 0;
941                     tp0 = target_pos;
942                     tp1 = target_pos + as->target_separation;
943                     tp2 = target_pos + as->target_separation + 1;
944                     break;
945                 case AlignmentView_combine_advance(0, 2):
946                     pos_to_start = 0;
947                     tp0 = target_pos;
948                     tp1 = target_pos + 1;
949                     tp2 = target_pos + as->target_separation;
950                     break;
951                 case AlignmentView_combine_advance(1, 2):
952                     pos_to_start = 1;
953                     tp0 = target_pos - as->target_separation;
954                     tp1 = target_pos;
955                     tp2 = target_pos + 1;
956                     break;
957                 case AlignmentView_combine_advance(1, 1):
958                     pos_to_start = 2;
959                     tp0 = target_pos - as->target_separation;
960                     tp1 = target_pos - as->target_separation + 1;
961                     tp2 = target_pos;
962                     break;
963                 default:
964                     g_error("Unexpected p2d split codon [%d,%d]",
965                             advance_query, advance_target);
966                     break;
967                 }
968             tg_codon[0] = Sequence_get_symbol(target, tp0);
969             tg_codon[1] = Sequence_get_symbol(target, tp1);
970             tg_codon[2] = Sequence_get_symbol(target, tp2);
971             }
972         }
973     g_assert(pos_to_start != -1);
974     av->curr_split_codon_count++;
975 /**/
976     if(!query_is_dna)
977         qy_aa_name = Alphabet_aa2tla(qy_aa_symbol);
978     if(!target_is_dna)
979         tg_aa_name = Alphabet_aa2tla(tg_aa_symbol);
980     num_to_print = MAX(advance_query, advance_target);
981     query_string = g_strdup_printf("{%.*s}",
982                     num_to_print,
983                     (query_is_dna?qy_codon:qy_aa_name)+pos_to_start);
984     target_string = g_strdup_printf("{%.*s}",
985                      num_to_print,
986                      (target_is_dna?tg_codon:tg_aa_name)+pos_to_start);
987     g_strup(qy_codon);
988     g_strup(tg_codon);
989     if(query_is_dna){
990         g_assert(!qy_aa_symbol);
991         qy_aa_symbol = Translate_codon(translate, qy_codon);
992         tr_codon_name = Alphabet_aa2tla(qy_aa_symbol);
993         inner_query_string = g_strdup_printf("{%.*s}",
994                   num_to_print, tr_codon_name+pos_to_start);
995         }
996     if(target_is_dna){
997         g_assert(!tg_aa_symbol);
998         tg_aa_symbol = Translate_codon(translate, tg_codon);
999         tr_codon_name = Alphabet_aa2tla(tg_aa_symbol);
1000         inner_target_string = g_strdup_printf("{%.*s}",
1001                       num_to_print, tr_codon_name+pos_to_start);
1002         }
1003     /**/
1004     if(query_is_dna){
1005         if(target_is_dna){ /* d,d */
1006             codon_match = g_strnfill(3, Alignment_get_equiv_symbol(
1007                             qy_aa_symbol, tg_aa_symbol,
1008                             protein_submat));
1009         } else { /* d,p */
1010             codon_match = Alignment_get_codon_match_string(
1011                             qy_codon, tg_aa_symbol,
1012                             protein_submat, translate);
1013             }
1014     } else { /* p,d */
1015         codon_match = Alignment_get_codon_match_string(
1016                         tg_codon, qy_aa_symbol,
1017                         protein_submat, translate);
1018         }
1019     match_string = g_strdup_printf("{%.*s}",
1020                          num_to_print, codon_match+pos_to_start);
1021     g_free(codon_match);
1022     /**/
1023     g_assert(query_string);
1024     g_assert(match_string);
1025     g_assert(target_string);
1026     AlignmentView_add(av, query_string, inner_query_string,
1027                       match_string, inner_target_string,
1028                       target_string, query_pos, target_pos);
1029     g_free(query_string);
1030     g_free(match_string);
1031     g_free(target_string);
1032     if(inner_query_string)
1033         g_free(inner_query_string);
1034     if(inner_target_string)
1035         g_free(inner_target_string);
1036     return;
1037     }
1038 
AlignmentView_add_FRAMESHIFT(AlignmentView * av,gint advance_query,gint advance_target,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Translate * translate)1039 static void AlignmentView_add_FRAMESHIFT(AlignmentView *av,
1040                 gint advance_query, gint advance_target,
1041                 gint total_length, Sequence *query, Sequence *target,
1042                 gint query_pos, gint target_pos, Translate *translate){
1043     register gint i, j;
1044     register gint curr_query_pos = query_pos,
1045                   curr_target_pos = target_pos;
1046     static gchar seq_string[4], match_string[4], gap_string[4];
1047     register Alphabet_Type emitted_alphabet_type;
1048     g_assert(!(advance_query && advance_target));
1049     match_string[0] = '#'; /* Frameshift */
1050     gap_string[0] = '-';
1051     if(advance_query)
1052         emitted_alphabet_type = query->alphabet->type;
1053     else /* advance_target */
1054         emitted_alphabet_type = target->alphabet->type;
1055     for(i = 0; i < total_length; i++){
1056         for(j = 0; j < (advance_query|advance_target); j++){
1057             if(advance_query)
1058                 seq_string[j] = Sequence_get_symbol(query, curr_query_pos+j);
1059             else /* advance_target */
1060                 seq_string[j] = Sequence_get_symbol(target, curr_target_pos+j);
1061             match_string[j] = match_string[0];
1062             gap_string[j] = gap_string[0];
1063             }
1064         seq_string[j] = match_string[j] = gap_string[j] = '\0';
1065         if(emitted_alphabet_type == Alphabet_Type_PROTEIN){
1066             strncpy(seq_string, Alphabet_aa2tla(seq_string[0]), 3);
1067             match_string[2] = match_string[1] = match_string[0];
1068             gap_string[2] = gap_string[1] = gap_string[0];
1069             seq_string[3] = match_string[3] = gap_string[3] = '\0';
1070             }
1071         if(advance_query)
1072             AlignmentView_add(av, seq_string,
1073                                   match_string,
1074                                   match_string,
1075                                   gap_string,
1076                                   gap_string,
1077                                   curr_query_pos, curr_target_pos);
1078         else
1079             AlignmentView_add(av, gap_string,
1080                                   gap_string,
1081                                   match_string,
1082                                   match_string,
1083                                   seq_string,
1084                                   curr_query_pos, curr_target_pos);
1085         curr_query_pos += advance_query;
1086         curr_target_pos += advance_target;
1087         }
1088     return;
1089     }
1090 
AlignmentView_add_label_operation(AlignmentView * av,C4_Transition * transition,gint total_length,Sequence * query,Sequence * target,gint query_pos,gint target_pos,Submat * dna_submat,Submat * protein_submat,Translate * translate,gboolean next_has_same_label,C4_Transition ** last_match)1091 static void AlignmentView_add_label_operation(AlignmentView *av,
1092                 C4_Transition *transition,
1093                 gint total_length, Sequence *query, Sequence *target,
1094                 gint query_pos, gint target_pos,
1095                 Submat *dna_submat, Submat *protein_submat,
1096                 Translate *translate,
1097                 gboolean next_has_same_label, C4_Transition **last_match){
1098     switch(transition->label){
1099         case C4_Label_NONE:
1100             g_assert(!transition->advance_query);
1101             g_assert(!transition->advance_target);
1102             break;
1103         case C4_Label_MATCH:
1104             (*last_match) = transition;
1105             AlignmentView_add_MATCH(av, transition,
1106                     total_length, query, target,
1107                     query_pos, target_pos,
1108                     dna_submat, protein_submat, translate);
1109             break;
1110         case C4_Label_GAP:
1111             AlignmentView_add_GAP(av,
1112                     transition->advance_query,
1113                     transition->advance_target,
1114                     total_length, query, target,
1115                     query_pos, target_pos, translate);
1116             break;
1117         case C4_Label_5SS: /*fallthrough*/
1118             AlignmentView_add_SPLICE_SITE(av,
1119                     transition->advance_query,
1120                     transition->advance_target,
1121                     total_length, query, target,
1122                     query_pos, target_pos, TRUE, (*last_match));
1123             break;
1124         case C4_Label_3SS:
1125             AlignmentView_add_SPLICE_SITE(av,
1126                     transition->advance_query,
1127                     transition->advance_target,
1128                     total_length, query, target,
1129                     query_pos, target_pos, FALSE, (*last_match));
1130             break;
1131         case C4_Label_INTRON:
1132             av->intron_advance_query += (transition->advance_query
1133                                          * total_length);
1134             av->intron_advance_target += (transition->advance_target
1135                                          * total_length);
1136             if(!next_has_same_label){
1137                 AlignmentView_add_INTRON(av,
1138                         av->intron_advance_query,
1139                         av->intron_advance_target,
1140                         query, target,
1141                         query_pos, target_pos, (*last_match));
1142                 av->intron_advance_query = 0;
1143                 av->intron_advance_target = 0;
1144                 }
1145             break;
1146         case C4_Label_NER:
1147             av->ner_advance_query += (transition->advance_query
1148                                      * total_length);
1149             av->ner_advance_target += (transition->advance_target
1150                                      * total_length);
1151             if(!next_has_same_label){
1152                 AlignmentView_add_NER(av, av->ner_advance_query,
1153                                           av->ner_advance_target,
1154                                           query_pos, target_pos);
1155                 av->ner_advance_query = 0;
1156                 av->ner_advance_target = 0;
1157                 }
1158             break;
1159         case C4_Label_SPLIT_CODON:
1160             g_assert(total_length == 1);
1161             AlignmentView_add_SPLIT_CODON(av, query, target,
1162                               transition->advance_query,
1163                               transition->advance_target,
1164                               query_pos, target_pos,
1165                               protein_submat, translate);
1166             break;
1167         case C4_Label_FRAMESHIFT:
1168             AlignmentView_add_FRAMESHIFT(av,
1169                     transition->advance_query,
1170                     transition->advance_target,
1171                     total_length, query, target,
1172                     query_pos, target_pos, translate);
1173             break;
1174         default:
1175             g_error("AlignmentView cannot use label [%s]",
1176                     C4_Label_get_name(transition->label));
1177             break;
1178         }
1179     return;
1180     }
1181 
AlignmentView_prepare(AlignmentView * av,Alignment * alignment,Sequence * query,Sequence * target,Submat * dna_submat,Submat * protein_submat,Translate * translate)1182 static void AlignmentView_prepare(AlignmentView *av,
1183                        Alignment *alignment,
1184                        Sequence *query, Sequence *target,
1185                        Submat *dna_submat, Submat *protein_submat,
1186                        Translate *translate){
1187     register gint i;
1188     register gint query_pos = alignment->region->query_start,
1189                   target_pos = alignment->region->target_start;
1190     register AlignmentPosition *apos;
1191     register AlignmentOperation *ao, *prev_ao;
1192     register gint total_length;
1193     C4_Transition *last_match = NULL;
1194     apos = g_new(AlignmentPosition, 1);
1195     apos->query_pos  = alignment->region->query_start-1;
1196     apos->target_pos = alignment->region->target_start-1;
1197     g_ptr_array_add(av->row_marker, apos);
1198     prev_ao = alignment->operation_list->pdata[0];
1199     total_length = prev_ao->length;
1200     for(i = 1; i < alignment->operation_list->len; i++){
1201         ao = alignment->operation_list->pdata[i];
1202         if(prev_ao->transition == ao->transition){
1203             /* group */
1204             total_length += ao->length;
1205         } else {
1206             /* report */
1207             AlignmentView_add_label_operation(av, prev_ao->transition,
1208                 total_length, query, target, query_pos, target_pos,
1209                 dna_submat, protein_submat, translate,
1210                 (prev_ao->transition->label == ao->transition->label),
1211                 &last_match);
1212             query_pos += (prev_ao->transition->advance_query
1213                         * total_length);
1214             target_pos += (prev_ao->transition->advance_target
1215                          * total_length);
1216             /* start new */
1217             prev_ao = ao;
1218             total_length = prev_ao->length;
1219             }
1220         }
1221     AlignmentView_add_label_operation(av, prev_ao->transition,
1222         total_length, query, target, query_pos, target_pos,
1223         dna_submat, protein_submat, translate, FALSE,
1224         &last_match);
1225     apos = g_new(AlignmentPosition, 1);
1226     apos->query_pos  = Region_query_end(alignment->region)-1;
1227     apos->target_pos = Region_target_end(alignment->region)-1;
1228     g_ptr_array_add(av->row_marker, apos);
1229     return;
1230     }
1231 
AlignmentView_string_is_empty(GString * str,gint pos,gint width)1232 static gboolean AlignmentView_string_is_empty(GString *str, gint pos,
1233                                               gint width){
1234     register gint i;
1235     for(i = 0; i < width; i++)
1236         if(str->str[pos+i] != ' ')
1237             return FALSE;
1238     return TRUE;
1239     }
1240 
AlignmentView_prepare_seq(GString * outer,GString * inner,gint pos,gint width)1241 static void AlignmentView_prepare_seq(GString *outer, GString *inner,
1242                                       gint pos, gint width){
1243     register gint i;
1244     register gchar t;
1245     for(i = 0; i < width; i++){
1246         if(inner->str[pos+i] == ' '){ /* Swap empty */
1247              t = inner->str[pos+i];
1248              inner->str[pos+i] = outer->str[pos+i];
1249              outer->str[pos+i] = t;
1250              continue;
1251              }
1252         if(inner->str[pos+i] == '^') /* Replace padding */
1253             inner->str[pos+i] = ' ';
1254         }
1255     return;
1256     }
1257 
AlignmentView_replace_padding(GString * str,gint pos,gint width)1258 static void AlignmentView_replace_padding(GString *str, gint pos, gint width){
1259     register gint i;
1260     for(i = 0; i < width; i++){
1261         if(str->str[pos+i] == '^')
1262             str->str[pos+i] = ' ';
1263         }
1264     return;
1265     }
1266 
AlignmentView_display_row(AlignmentView * av,gint row,gint pos,gint width,gint maxposlen,Sequence * query,Sequence * target,FILE * fp)1267 static void AlignmentView_display_row(AlignmentView *av,
1268             gint row, gint pos, gint width, gint maxposlen,
1269             Sequence *query, Sequence *target, FILE *fp){
1270     register AlignmentPosition *apos1 = av->row_marker->pdata[row],
1271                                *apos2 = av->row_marker->pdata[row+1];
1272     register gint p1q, p1t, p2q, p2t;
1273     register Alignment_ArgumentSet *aas
1274         = Alignment_ArgumentSet_create(NULL);
1275     register gboolean show_inner_query = FALSE,
1276                       show_inner_target = FALSE;
1277     p1q = apos1->query_pos+1;
1278     p2q = apos2->query_pos+1;
1279     p1t = apos1->target_pos+1;
1280     p2t = apos2->target_pos+1;
1281     if(aas->forward_strand_coords){
1282         if(query->strand == Sequence_Strand_REVCOMP){
1283             p1q = query->len - p1q - 1;
1284             p2q = query->len - p2q + 1;
1285             }
1286         if(target->strand == Sequence_Strand_REVCOMP){
1287             p1t = target->len - p1t - 1;
1288             p2t = target->len - p2t + 1;
1289             }
1290         }
1291     if(av->inner_query
1292     && (!AlignmentView_string_is_empty(av->inner_query, pos, width))){
1293         show_inner_query = TRUE;
1294         AlignmentView_prepare_seq(av->outer_query,
1295                                   av->inner_query, pos, width);
1296         }
1297     if(av->inner_target
1298     && (!AlignmentView_string_is_empty(av->inner_target, pos, width))){
1299         show_inner_target = TRUE;
1300         AlignmentView_prepare_seq(av->outer_target,
1301                                   av->inner_target, pos, width);
1302         }
1303     AlignmentView_replace_padding(av->outer_query, pos, width);
1304     AlignmentView_replace_padding(av->outer_target, pos, width);
1305     fprintf(fp, " %*d : %.*s : %*d\n", maxposlen, p1q+1,
1306             width, av->outer_query->str+pos,
1307             maxposlen, p2q);
1308     if(show_inner_query)
1309         fprintf(fp, " %*s   %.*s\n", maxposlen, " ", width,
1310                                  av->inner_query->str+pos);
1311     fprintf(fp, " %*s   %.*s\n", maxposlen, " ", width,
1312                              av->middle->str+pos);
1313     if(show_inner_target)
1314         fprintf(fp, " %*s   %.*s\n", maxposlen, " ", width,
1315                                  av->inner_target->str+pos);
1316     fprintf(fp, " %*d : %.*s : %*d\n", maxposlen, p1t+1,
1317             width, av->outer_target->str+pos,
1318             maxposlen, p2t);
1319     return;
1320     }
1321 
AlignmentView_display(AlignmentView * av,Sequence * query,Sequence * target,FILE * fp)1322 static void AlignmentView_display(AlignmentView *av,
1323                                   Sequence *query, Sequence *target,
1324                                   FILE *fp){
1325     register gint pos = 0, pause, row = 0;
1326     pause = av->outer_query->len-av->width;
1327     while(pos < pause){
1328         AlignmentView_display_row(av, row, pos, av->width,
1329                                   av->max_pos_len, query, target, fp);
1330         pos += av->width;
1331         row++;
1332         fprintf(fp, "\n");
1333         }
1334     AlignmentView_display_row(av, row, pos, av->outer_query->len-pos,
1335                               av->max_pos_len, query, target, fp);
1336     fprintf(fp, "\n");
1337     return;
1338     }
1339 
1340 /**/
1341 
Alignment_display(Alignment * alignment,Sequence * query,Sequence * target,Submat * dna_submat,Submat * protein_submat,Translate * translate,FILE * fp)1342 void Alignment_display(Alignment *alignment,
1343                        Sequence *query, Sequence *target,
1344                        Submat *dna_submat, Submat *protein_submat,
1345                        Translate *translate, FILE *fp){
1346     register AlignmentView *av = AlignmentView_create(alignment,
1347                                                       query, target);
1348     g_assert(alignment);
1349     g_assert(!alignment->model->is_open);
1350     /* Display header */
1351     fprintf(fp, "\n"
1352             "C4 Alignment:\n"
1353             "------------\n"
1354             "         Query: %s%s%s\n"
1355             "        Target: %s%s%s\n"
1356             "         Model: %s\n"
1357             "     Raw score: %d\n"
1358             "   Query range: %d -> %d\n"
1359             "  Target range: %d -> %d\n\n",
1360             query->id, query->def?" ":"", query->def?query->def:"",
1361             target->id, target->def?" ":"", target->def?target->def:"",
1362             alignment->model->name,
1363             alignment->score,
1364             Alignment_get_coordinate(alignment, query, target,
1365                                      TRUE, TRUE),
1366             Alignment_get_coordinate(alignment, query, target,
1367                                      TRUE, FALSE),
1368             Alignment_get_coordinate(alignment, query, target,
1369                                      FALSE, TRUE),
1370             Alignment_get_coordinate(alignment, query, target,
1371                                      FALSE, FALSE));
1372     AlignmentView_prepare(av, alignment, query, target,
1373                           dna_submat, protein_submat, translate);
1374     AlignmentView_display(av, query, target, fp);
1375     AlignmentView_destroy(av);
1376     return;
1377     }
1378 
1379 /**/
1380 
Alignment_get_equivalenced_matching(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data)1381 static gint Alignment_get_equivalenced_matching(Alignment *alignment,
1382               Sequence *query, Sequence *target,
1383               Translate *translate, gboolean report_id,
1384               gpointer user_data){
1385     register gint i, j, match = 0;
1386     register AlignmentOperation *ao;
1387     register gint query_pos = alignment->region->query_start,
1388                   target_pos = alignment->region->target_start;
1389     register gchar qy_symbol, tg_symbol;
1390     for(i = 0; i < alignment->operation_list->len; i++){
1391         ao = alignment->operation_list->pdata[i];
1392         if(ao->transition->label == C4_Label_MATCH){
1393             for(j = 0; j < ao->length; j++){
1394                 qy_symbol = Alignment_match_get_symbol(query, query_pos,
1395                                        ao->transition->advance_query,
1396                                        translate);
1397                 tg_symbol = Alignment_match_get_symbol(target,
1398                                        target_pos,
1399                                        ao->transition->advance_target,
1400                                        translate);
1401                 if(report_id){
1402                     if(toupper(qy_symbol) == toupper(tg_symbol))
1403                         match++;
1404                 } else { /* report similarity */
1405                     if(C4_Calc_score(ao->transition->calc, query_pos,
1406                                   target_pos, user_data) > 0)
1407                         match++;
1408                     }
1409                 query_pos += ao->transition->advance_query;
1410                 target_pos += ao->transition->advance_target;
1411                 }
1412         } else {
1413             query_pos += (ao->transition->advance_query
1414                           * ao->length);
1415             target_pos += (ao->transition->advance_target
1416                           * ao->length);
1417             }
1418         }
1419     return match;
1420     }
1421 /* Reports the %id over the equivalenced regions of the alignment
1422  */
1423 
Alignment_get_equivalenced_matching_region(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data,gint exon_query_start,gint exon_query_end)1424 static gint Alignment_get_equivalenced_matching_region(Alignment *alignment,
1425               Sequence *query, Sequence *target,
1426               Translate *translate, gboolean report_id,
1427               gpointer user_data, gint exon_query_start, gint exon_query_end){
1428     register gint i, j, match = 0;
1429     register AlignmentOperation *ao;
1430     register gint query_pos = alignment->region->query_start,
1431                   target_pos = alignment->region->target_start;
1432     register gchar qy_symbol, tg_symbol;
1433     for(i = 0; i < alignment->operation_list->len; i++){
1434         ao = alignment->operation_list->pdata[i];
1435         if(ao->transition->label == C4_Label_MATCH){
1436             for(j = 0; j < ao->length; j++){
1437                 if(query_pos > exon_query_end)
1438                     return match;
1439                 if(query_pos >= exon_query_start){
1440                     qy_symbol = Alignment_match_get_symbol(query, query_pos,
1441                                            ao->transition->advance_query,
1442                                            translate);
1443                     tg_symbol = Alignment_match_get_symbol(target,
1444                                            target_pos,
1445                                            ao->transition->advance_target,
1446                                            translate);
1447                     if(report_id){
1448                         if(toupper(qy_symbol) == toupper(tg_symbol))
1449                             match++;
1450                     } else { /* report similarity */
1451                         if(C4_Calc_score(ao->transition->calc, query_pos,
1452                                       target_pos, user_data) > 0)
1453                             match++;
1454                         }
1455                     }
1456                 query_pos += ao->transition->advance_query;
1457                 target_pos += ao->transition->advance_target;
1458                 }
1459         } else {
1460             query_pos += (ao->transition->advance_query
1461                           * ao->length);
1462             target_pos += (ao->transition->advance_target
1463                           * ao->length);
1464             }
1465         }
1466     return match;
1467     }
1468 /* Reports the %id over the equivalenced regions of the alignment
1469  */
1470 
Alignment_get_equivalenced_total(Alignment * alignment)1471 static gint Alignment_get_equivalenced_total(Alignment *alignment){
1472     register gint i, total = 0;
1473     register AlignmentOperation *ao;
1474     for(i = 0; i < alignment->operation_list->len; i++){
1475         ao = alignment->operation_list->pdata[i];
1476         if(ao->transition->label == C4_Label_MATCH)
1477             total += ao->length;
1478         }
1479     return total;
1480     }
1481 
Alignment_get_equivalenced_total_region(Alignment * alignment,gint exon_query_start,gint exon_query_end)1482 static gint Alignment_get_equivalenced_total_region(Alignment *alignment,
1483                           gint exon_query_start, gint exon_query_end){
1484     register gint i, j, total = 0;
1485     register gint query_pos = alignment->region->query_start,
1486                   target_pos = alignment->region->target_start;
1487     register AlignmentOperation *ao;
1488     for(i = 0; i < alignment->operation_list->len; i++){
1489         ao = alignment->operation_list->pdata[i];
1490         if(ao->transition->label == C4_Label_MATCH){
1491             for(j = 0; j < ao->length; j++){
1492                 if(query_pos > exon_query_end)
1493                     return total;
1494                 if(query_pos >= exon_query_start)
1495                     total++;
1496                 query_pos += ao->transition->advance_query;
1497                 target_pos += ao->transition->advance_target;
1498                 }
1499         } else {
1500             query_pos += (ao->transition->advance_query
1501                           * ao->length);
1502             target_pos += (ao->transition->advance_target
1503                           * ao->length);
1504             }
1505         }
1506     return total;
1507     }
1508 /* FIXME: optimisation: should use gene object to avoid full alignment pass
1509  */
1510 
Alignment_get_percent_score_region(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data,gint exon_query_start,gint exon_query_end)1511 static gfloat Alignment_get_percent_score_region(Alignment *alignment,
1512               Sequence *query, Sequence *target,
1513               Translate *translate, gboolean report_id, gpointer user_data,
1514               gint exon_query_start, gint exon_query_end){
1515     return (((gfloat)Alignment_get_equivalenced_matching_region(alignment,
1516                      query, target, translate, report_id, user_data,
1517                      exon_query_start, exon_query_end))
1518           / ((gfloat)Alignment_get_equivalenced_total_region(alignment,
1519                          exon_query_start, exon_query_end)))*100;
1520     }
1521 /* FIXME: should also count split codons */
1522 
Alignment_get_percent_score(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_id,gpointer user_data)1523 static gfloat Alignment_get_percent_score(Alignment *alignment,
1524               Sequence *query, Sequence *target,
1525               Translate *translate, gboolean report_id,
1526               gpointer user_data){
1527     return (((gfloat)Alignment_get_equivalenced_matching(alignment,
1528                      query, target, translate, report_id, user_data))
1529           / ((gfloat)Alignment_get_equivalenced_total(alignment)))*100;
1530     }
1531 /* FIXME: should also count split codons */
1532 
Alignment_get_match_score(Alignment * alignment,Sequence * query,Sequence * target,gpointer user_data)1533 static gint Alignment_get_match_score(Alignment *alignment,
1534                                       Sequence *query, Sequence *target,
1535                                       gpointer user_data){
1536     register gint i, j;
1537     register AlignmentOperation *ao;
1538     register C4_Score score = 0;
1539     register gint query_pos = alignment->region->query_start,
1540                   target_pos = alignment->region->target_start;
1541     for(i = 0;  i < alignment->operation_list->len; i++){
1542         ao = alignment->operation_list->pdata[i];
1543         for(j = 0; j < ao->length; j++){
1544             if(ao->transition->label == C4_Label_MATCH){
1545                 score += C4_Calc_score(ao->transition->calc, query_pos,
1546                                        target_pos, user_data);
1547                 }
1548             query_pos += ao->transition->advance_query;
1549             target_pos += ao->transition->advance_target;
1550             }
1551         }
1552     g_message("MATCH SCORE [%d]", score);
1553     return score;
1554     }
1555 
Alignment_get_self_match_score(Alignment * alignment,Sequence * query,Sequence * target,gpointer self_data)1556 static gint Alignment_get_self_match_score(Alignment *alignment,
1557                                      Sequence *query, Sequence *target,
1558                                      gpointer self_data){
1559     register gint i, j;
1560     register AlignmentOperation *ao;
1561     register C4_Score score = 0;
1562     register gint query_pos = alignment->region->query_start;
1563     for(i = 0; i < alignment->operation_list->len; i++){
1564         ao = alignment->operation_list->pdata[i];
1565         for(j = 0; j < ao->length; j++){
1566             if(ao->transition->label == C4_Label_MATCH){
1567                 /* Use self_data to find self-comparison scores */
1568                 score += C4_Calc_score(ao->transition->calc, query_pos,
1569                                        query_pos, self_data);
1570                 }
1571             query_pos += ao->transition->advance_query;
1572             }
1573         }
1574     g_message("SELF MATCH SCORE [%d]", score);
1575     return score;
1576     }
1577 
Alignment_get_percent_self(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gpointer user_data,gpointer self_data)1578 static gfloat Alignment_get_percent_self(Alignment *alignment,
1579               Sequence *query, Sequence *target,
1580               Translate *translate, gpointer user_data, gpointer self_data){
1581     return (((gfloat)Alignment_get_match_score(alignment,
1582                      query, target, user_data))
1583           / ((gfloat)Alignment_get_self_match_score(alignment, query, target,
1584                                                     self_data)))*100;
1585     }
1586 /* Returns score as a percentage of maximum possible
1587  * over the equivalenced transitions
1588  */
1589 
1590 /**/
1591 
Alignment_print_sugar_block(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)1592 static void Alignment_print_sugar_block(Alignment *alignment,
1593             Sequence *query, Sequence *target, FILE *fp){
1594     fprintf(fp, "%s %d %d %c %s %d %d %c %d",
1595              query->id,
1596              Alignment_get_coordinate(alignment, query, target,
1597                                       TRUE, TRUE),
1598              Alignment_get_coordinate(alignment, query, target,
1599                                       TRUE, FALSE),
1600              Sequence_get_strand_as_char(query),
1601              target->id,
1602              Alignment_get_coordinate(alignment, query, target,
1603                                       FALSE, TRUE),
1604              Alignment_get_coordinate(alignment, query, target,
1605                                       FALSE, FALSE),
1606              Sequence_get_strand_as_char(target),
1607              alignment->score);
1608     return;
1609     }
1610 
Alignment_get_cigar_type(AlignmentOperation * ao,gint * move)1611 static gchar Alignment_get_cigar_type(AlignmentOperation *ao,
1612                                       gint *move){
1613     if(!ao->transition->advance_query){
1614         (*move) = ao->transition->advance_target * ao->length;
1615         return 'D';
1616         }
1617     if(!ao->transition->advance_target){
1618         (*move) = ao->transition->advance_query * ao->length;
1619         return 'I';
1620         }
1621     (*move) = MAX(ao->transition->advance_query,
1622                   ao->transition->advance_target) * ao->length;
1623     return 'M';
1624     }
1625 
Alignment_print_cigar_block(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)1626 static void Alignment_print_cigar_block(Alignment *alignment,
1627             Sequence *query, Sequence *target, FILE *fp){
1628     register gint i = 0;
1629     register gchar *gap = "";
1630     register AlignmentOperation *ao;
1631     register gchar type, next_type;
1632     gint move, next_move;
1633     ao = alignment->operation_list->pdata[i];
1634     type = Alignment_get_cigar_type(ao, &move);
1635     for(i = 1; i < alignment->operation_list->len; i++){
1636         ao = alignment->operation_list->pdata[i];
1637         next_type = Alignment_get_cigar_type(ao, &next_move);
1638         if(type == next_type){
1639             move += next_move;
1640         } else {
1641             if(move)
1642                 fprintf(fp, "%s%c %d", gap, type, move);
1643             move = next_move;
1644             type = next_type;
1645             gap = " ";
1646             }
1647         }
1648     if(move)
1649         fprintf(fp, "%s%c %d", gap, type, move);
1650     return;
1651     }
1652 
Alignment_print_vulgar_block(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)1653 static void Alignment_print_vulgar_block(Alignment *alignment,
1654             Sequence *query, Sequence *target, FILE *fp){
1655     register gint i;
1656     register gchar *gap = "";
1657     register C4_Label curr_label;
1658     register gint curr_advance_query, curr_advance_target;
1659     register AlignmentOperation *ao;
1660     register gboolean curr_is_codon = FALSE;
1661     ao = alignment->operation_list->pdata[0];
1662     curr_label = ao->transition->label;
1663     curr_advance_query = (ao->transition->advance_query * ao->length);
1664     curr_advance_target = (ao->transition->advance_target * ao->length);
1665     for(i = 1; i < alignment->operation_list->len; i++){
1666         ao = alignment->operation_list->pdata[i];
1667         if((ao->transition->label == curr_label)
1668          && (curr_advance_query || (!ao->transition->advance_query))
1669          && (curr_advance_target || (!ao->transition->advance_target))
1670          && (curr_is_codon == ((ao->transition->advance_query == 3)
1671                                && (ao->transition->advance_target == 3)))){
1672             curr_advance_query += (ao->transition->advance_query
1673                                   * ao->length);
1674             curr_advance_target += (ao->transition->advance_target
1675                                   * ao->length);
1676         } else {
1677             switch(curr_label){
1678                 case C4_Label_NONE:
1679                     break;
1680                 case C4_Label_MATCH:
1681                     g_assert(curr_advance_query && curr_advance_target);
1682                     fprintf(fp, "%s%c %d %d", gap, curr_is_codon?'C':'M',
1683                                               curr_advance_query,
1684                                               curr_advance_target);
1685                     gap = " ";
1686                     break;
1687                 case C4_Label_GAP:
1688                     g_assert(curr_advance_query || curr_advance_target);
1689                     g_assert(!  (curr_advance_query
1690                               && curr_advance_target));
1691                     fprintf(fp, "%sG %d %d", gap, curr_advance_query,
1692                                                   curr_advance_target);
1693                     gap = " ";
1694                     break;
1695                 case C4_Label_NER:
1696                     fprintf(fp, "%sN %d %d", gap, curr_advance_query,
1697                                                   curr_advance_target);
1698                     gap = " ";
1699                     break;
1700                 case C4_Label_5SS:
1701                     fprintf(fp, "%s5 %d %d", gap, curr_advance_query,
1702                                                   curr_advance_target);
1703                     gap = " ";
1704                     break;
1705                 case C4_Label_3SS:
1706                     fprintf(fp, "%s3 %d %d", gap, curr_advance_query,
1707                                                   curr_advance_target);
1708                     gap = " ";
1709                     break;
1710                 case C4_Label_INTRON:
1711                     fprintf(fp, "%sI %d %d", gap, curr_advance_query,
1712                                                   curr_advance_target);
1713                     gap = " ";
1714                     break;
1715                 case C4_Label_SPLIT_CODON:
1716                     fprintf(fp, "%sS %d %d", gap, curr_advance_query,
1717                                                   curr_advance_target);
1718                     gap = " ";
1719                     break;
1720                 case C4_Label_FRAMESHIFT:
1721                     fprintf(fp, "%sF %d %d", gap, curr_advance_query,
1722                                                    curr_advance_target);
1723                     gap = " ";
1724                     break;
1725                 default:
1726                     g_error("Unknown C4_Label [%d]", curr_label);
1727                     break;
1728                 }
1729             curr_label = ao->transition->label;
1730             curr_is_codon = ((ao->transition->advance_query == 3)
1731                           && (ao->transition->advance_target == 3));
1732             curr_advance_query = (ao->transition->advance_query
1733                                   * ao->length);
1734             curr_advance_target = (ao->transition->advance_target
1735                                   * ao->length);
1736             }
1737         }
1738     return;
1739     }
1740 
1741 typedef enum {
1742     Alignment_RYO_TOKEN_STRING,
1743     /**/
1744     Alignment_RYO_TOKEN_ID,           /* [QT] */
1745     Alignment_RYO_TOKEN_DEF,          /* [QT] */
1746     Alignment_RYO_TOKEN_LEN,          /* [QT] */
1747     Alignment_RYO_TOKEN_STRAND,       /* [QT] */
1748     Alignment_RYO_TOKEN_SEQ,          /* [QT] */
1749     Alignment_RYO_TOKEN_TYPE,         /* [QT] */
1750     /**/
1751     Alignment_RYO_TOKEN_ALIGN_BEGIN,  /* [QT] */
1752     Alignment_RYO_TOKEN_ALIGN_END,    /* [QT] */
1753     Alignment_RYO_TOKEN_ALIGN_LEN,    /* [QT] */
1754     Alignment_RYO_TOKEN_ALIGN_SEQ,    /* [QT] */
1755     /**/
1756     Alignment_RYO_TOKEN_CODING_BEGIN, /* [QT] */
1757     Alignment_RYO_TOKEN_CODING_END,   /* [QT] */
1758     Alignment_RYO_TOKEN_CODING_LEN,   /* [QT] */
1759     Alignment_RYO_TOKEN_CODING_SEQ,   /* [QT] */
1760     /**/
1761     Alignment_RYO_TOKEN_SCORE,
1762     Alignment_RYO_TOKEN_MODEL_NAME,
1763     Alignment_RYO_TOKEN_RANK,
1764     Alignment_RYO_TOKEN_PERCENT_ID,
1765     Alignment_RYO_TOKEN_PERCENT_SIMILARITY,
1766     Alignment_RYO_TOKEN_PERCENT_SELF,
1767     Alignment_RYO_TOKEN_GENE_ORIENTATION,
1768     Alignment_RYO_TOKEN_EQUIVALENCED_TOTAL,
1769     Alignment_RYO_TOKEN_EQUIVALENCED_IDENTCAL,
1770     Alignment_RYO_TOKEN_EQUIVALENCED_SIMILAR,
1771     Alignment_RYO_TOKEN_EQUIVALENCED_MISMATCHES,
1772     /**/
1773     Alignment_RYO_TOKEN_SUGAR_BLOCK,
1774     Alignment_RYO_TOKEN_CIGAR_BLOCK,
1775     Alignment_RYO_TOKEN_VULGAR_BLOCK,
1776     /**/
1777     Alignment_RYO_TOKEN_PTO_OPEN,
1778     Alignment_RYO_TOKEN_PTO_CLOSE,
1779     /**/
1780     Alignment_RYO_TOKEN_PTO_SEQ,     /* [QT] */
1781     Alignment_RYO_TOKEN_PTO_ADVANCE, /* [QT] */
1782     Alignment_RYO_TOKEN_PTO_BEGIN,   /* [QT] */
1783     Alignment_RYO_TOKEN_PTO_END,     /* [QT] */
1784     /**/
1785     Alignment_RYO_TOKEN_PTO_NAME,
1786     Alignment_RYO_TOKEN_PTO_SCORE,
1787     Alignment_RYO_TOKEN_PTO_LABEL
1788 } Alignment_RYO_Token;
1789 
1790 typedef struct {
1791     Alignment_RYO_Token  token;
1792                 GString *str;      /* For TOKEN_STRING */
1793                gboolean  on_query; /* For [QT] */
1794 } Alignment_RYO_ComplexToken;
1795 
Alignment_RYO_ComplexToken_create(Alignment_RYO_Token token,gchar * str,gboolean on_query)1796 static Alignment_RYO_ComplexToken *Alignment_RYO_ComplexToken_create(
1797        Alignment_RYO_Token token, gchar *str, gboolean on_query){
1798     register Alignment_RYO_ComplexToken *rct
1799      = g_new(Alignment_RYO_ComplexToken, 1);
1800     rct->token = token;
1801     if(str)
1802         rct->str = g_string_new(str);
1803     else
1804         rct->str = NULL;
1805     rct->on_query = on_query;
1806     return rct;
1807     }
1808 
Alignment_RYO_ComplexToken_destroy(Alignment_RYO_ComplexToken * rct)1809 static void Alignment_RYO_ComplexToken_destroy(
1810             Alignment_RYO_ComplexToken *rct){
1811     if(rct->str)
1812         g_string_free(rct->str, TRUE);
1813     g_free(rct);
1814     return;
1815     }
1816 
Alignment_RYO_add_string(GPtrArray * token_list,gchar * str)1817 static void Alignment_RYO_add_string(GPtrArray *token_list, gchar *str){
1818     register Alignment_RYO_ComplexToken *rct = NULL;
1819     if(token_list->len){
1820         rct = token_list->pdata[token_list->len-1];
1821         if(rct->token != Alignment_RYO_TOKEN_STRING)
1822             rct = NULL;
1823         }
1824     if(rct){
1825         g_assert(rct->str);
1826         g_string_append(rct->str, str);
1827     } else {
1828         rct = Alignment_RYO_ComplexToken_create(
1829               Alignment_RYO_TOKEN_STRING, str, FALSE);
1830         g_ptr_array_add(token_list, rct);
1831         }
1832     return;
1833     }
1834 
Alignment_RYO_add_token(GPtrArray * token_list,Alignment_RYO_Token token,gboolean on_query)1835 static void Alignment_RYO_add_token(GPtrArray *token_list,
1836                                     Alignment_RYO_Token token,
1837                                     gboolean on_query){
1838     register Alignment_RYO_ComplexToken *rct
1839      = Alignment_RYO_ComplexToken_create(token, NULL, on_query);
1840     g_ptr_array_add(token_list, rct);
1841     return;
1842     }
1843 
Alignment_RYO_tokenise_strand(GPtrArray * token_list,gchar * format,gint pos,gboolean on_query)1844 static gint Alignment_RYO_tokenise_strand(GPtrArray *token_list,
1845                            gchar *format, gint pos, gboolean on_query){
1846     register gint move = 1;
1847     switch(format[pos]){
1848         case 'i':
1849             Alignment_RYO_add_token(token_list,
1850                       Alignment_RYO_TOKEN_ID, on_query);
1851             break;
1852         case 'd':
1853             Alignment_RYO_add_token(token_list,
1854                       Alignment_RYO_TOKEN_DEF, on_query);
1855             break;
1856         case 'l':
1857             Alignment_RYO_add_token(token_list,
1858                       Alignment_RYO_TOKEN_LEN, on_query);
1859             break;
1860         case 's':
1861             Alignment_RYO_add_token(token_list,
1862                       Alignment_RYO_TOKEN_SEQ, on_query);
1863             break;
1864         case 'S':
1865             Alignment_RYO_add_token(token_list,
1866                       Alignment_RYO_TOKEN_STRAND, on_query);
1867             break;
1868         case 't':
1869             Alignment_RYO_add_token(token_list,
1870                       Alignment_RYO_TOKEN_TYPE, on_query);
1871             break;
1872         case 'a':
1873             switch(format[pos+1]){
1874                 case 'b':
1875                     Alignment_RYO_add_token(token_list,
1876                         Alignment_RYO_TOKEN_ALIGN_BEGIN, on_query);
1877                     break;
1878                 case 'e':
1879                     Alignment_RYO_add_token(token_list,
1880                         Alignment_RYO_TOKEN_ALIGN_END, on_query);
1881                     break;
1882                 case 'l':
1883                     Alignment_RYO_add_token(token_list,
1884                         Alignment_RYO_TOKEN_ALIGN_LEN, on_query);
1885                     break;
1886                 case 's':
1887                     Alignment_RYO_add_token(token_list,
1888                         Alignment_RYO_TOKEN_ALIGN_SEQ, on_query);
1889                     break;
1890                 default:
1891                     g_error("Unknown [%%%c%c%c] in format string [%s]",
1892                         format[pos-1], format[pos], format[pos+1],
1893                         format);
1894                     break;
1895                 }
1896             move++;
1897             break;
1898         case 'c':
1899             switch(format[pos+1]){
1900                 case 'b':
1901                     Alignment_RYO_add_token(token_list,
1902                         Alignment_RYO_TOKEN_CODING_BEGIN, on_query);
1903                     break;
1904                 case 'e':
1905                     Alignment_RYO_add_token(token_list,
1906                         Alignment_RYO_TOKEN_CODING_END, on_query);
1907                     break;
1908                 case 'l':
1909                     Alignment_RYO_add_token(token_list,
1910                         Alignment_RYO_TOKEN_CODING_LEN, on_query);
1911                     break;
1912                 case 's':
1913                     Alignment_RYO_add_token(token_list,
1914                         Alignment_RYO_TOKEN_CODING_SEQ, on_query);
1915                     break;
1916 
1917                 default:
1918                     g_error("Unknown [%%%c%c%c] in format string [%s]",
1919                         format[pos-1], format[pos], format[pos+1],
1920                         format);
1921                     break;
1922                 }
1923             move++;
1924             break;
1925         default:
1926             g_error("Unknown [%%%c%c] in format string [%s]",
1927                     format[pos-1], format[pos], format);
1928             break;
1929         }
1930     return move;
1931     }
1932 
Alignment_RYO_tokenise_PTO_strand(GPtrArray * token_list,gchar * format,gint pos,gboolean on_query)1933 static gint Alignment_RYO_tokenise_PTO_strand(GPtrArray *token_list,
1934                            gchar *format, gint pos, gboolean on_query){
1935     register gint move = 1;
1936     switch(format[pos]){
1937         case 's':
1938             Alignment_RYO_add_token(token_list,
1939                         Alignment_RYO_TOKEN_PTO_SEQ, on_query);
1940             break;
1941         case 'a':
1942             Alignment_RYO_add_token(token_list,
1943                         Alignment_RYO_TOKEN_PTO_ADVANCE, on_query);
1944             break;
1945         case 'b':
1946             Alignment_RYO_add_token(token_list,
1947                         Alignment_RYO_TOKEN_PTO_BEGIN, on_query);
1948             break;
1949         case 'e':
1950             Alignment_RYO_add_token(token_list,
1951                         Alignment_RYO_TOKEN_PTO_END, on_query);
1952             break;
1953         default:
1954             g_error("Unknown [%%%c%c%c] in format string [%s]",
1955                     format[pos-2], format[pos-1], format[pos], format);
1956             break;
1957         }
1958     return move;
1959     }
1960 
Alignment_RYO_tokenise_PTO(GPtrArray * token_list,gchar * format,gint pos)1961 static gint Alignment_RYO_tokenise_PTO(GPtrArray *token_list,
1962                            gchar *format, gint pos){
1963     register gint move = 1;
1964     switch(format[pos]){
1965         case 'q':
1966             move += Alignment_RYO_tokenise_PTO_strand(token_list,
1967                         format, pos+1, TRUE);
1968             break;
1969         case 't':
1970             move += Alignment_RYO_tokenise_PTO_strand(token_list,
1971                         format, pos+1, FALSE);
1972             break;
1973         case 'n':
1974             Alignment_RYO_add_token(token_list,
1975                         Alignment_RYO_TOKEN_PTO_NAME, FALSE);
1976             break;
1977         case 's':
1978             Alignment_RYO_add_token(token_list,
1979                         Alignment_RYO_TOKEN_PTO_SCORE, FALSE);
1980             break;
1981         case 'l':
1982             Alignment_RYO_add_token(token_list,
1983                         Alignment_RYO_TOKEN_PTO_LABEL, FALSE);
1984             break;
1985         default:
1986             g_error("Unknown [%%%c%c] in format string [%s]",
1987                     format[pos-1], format[pos], format);
1988             break;
1989         }
1990     return move;
1991     }
1992 
Alignment_RYO_tokenise(gchar * format)1993 static GPtrArray *Alignment_RYO_tokenise(gchar *format){
1994     register gint i;
1995     register GPtrArray *token_list = g_ptr_array_new();
1996     gchar mini_str[2];
1997     mini_str[1] = '\0';
1998     for(i = 0; format[i]; i++){
1999         switch(format[i]){
2000             case '\\':
2001                 switch(format[i+1]){
2002                     case '\\':
2003                         Alignment_RYO_add_string(token_list, "\\");
2004                         break;
2005                     case 'n':
2006                         Alignment_RYO_add_string(token_list, "\n");
2007                         break;
2008                     case 't':
2009                         Alignment_RYO_add_string(token_list, "\t");
2010                         break;
2011                     case '{':
2012                         Alignment_RYO_add_string(token_list, "{");
2013                         break;
2014                     case '}':
2015                         Alignment_RYO_add_string(token_list, "}");
2016                         break;
2017                     default:
2018                         g_error("Unknown [\\%c] in ryo string [%s]",
2019                                 format[i+1], format);
2020                         break;
2021                     }
2022                 i++;
2023                 break;
2024             case '%':
2025                 switch(format[i+1]){
2026                     case '%':
2027                         Alignment_RYO_add_string(token_list, "%");
2028                         break;
2029                     case 'q':
2030                         i += Alignment_RYO_tokenise_strand(token_list,
2031                                 format, i+2, TRUE);
2032                         break;
2033                     case 't':
2034                         i += Alignment_RYO_tokenise_strand(token_list,
2035                                 format, i+2, FALSE);
2036                         break;
2037                     case 's':
2038                         Alignment_RYO_add_token(token_list,
2039                             Alignment_RYO_TOKEN_SCORE, FALSE);
2040                         break;
2041                     case 'm':
2042                         Alignment_RYO_add_token(token_list,
2043                             Alignment_RYO_TOKEN_MODEL_NAME, FALSE);
2044                         break;
2045                     case 'r':
2046                         Alignment_RYO_add_token(token_list,
2047                             Alignment_RYO_TOKEN_RANK, FALSE);
2048                         break;
2049                     case 'p':
2050                         switch(format[i+2]){
2051                             case 'i':
2052                                 Alignment_RYO_add_token(token_list,
2053                                  Alignment_RYO_TOKEN_PERCENT_ID, FALSE);
2054                                 break;
2055                             case 's':
2056                                 Alignment_RYO_add_token(token_list,
2057                                  Alignment_RYO_TOKEN_PERCENT_SIMILARITY,
2058                                  FALSE);
2059                                 break;
2060                             case 'S':
2061                                 Alignment_RYO_add_token(token_list,
2062                                  Alignment_RYO_TOKEN_PERCENT_SELF,
2063                                  FALSE);
2064                                 break;
2065                             default:
2066                                 g_error(
2067                                    "Unknown [%%%c%c] in format string",
2068                                    format[i+1], format[i+2]);
2069                                 break;
2070                             }
2071                         i++;
2072                         break;
2073                     case 'e':
2074                         switch(format[i+2]){
2075                             case 't':
2076                                 Alignment_RYO_add_token(token_list,
2077                                  Alignment_RYO_TOKEN_EQUIVALENCED_TOTAL,
2078                                  FALSE);
2079                                 break;
2080                             case 'i':
2081                                 Alignment_RYO_add_token(token_list,
2082                              Alignment_RYO_TOKEN_EQUIVALENCED_IDENTCAL,
2083                                  FALSE);
2084                                 break;
2085                             case 's':
2086                                 Alignment_RYO_add_token(token_list,
2087                              Alignment_RYO_TOKEN_EQUIVALENCED_SIMILAR,
2088                                  FALSE);
2089                                 break;
2090                             case 'm':
2091                                 Alignment_RYO_add_token(token_list,
2092                             Alignment_RYO_TOKEN_EQUIVALENCED_MISMATCHES,
2093                                  FALSE);
2094                                 break;
2095                             default:
2096                                 g_error(
2097                                    "Unknown [%%%c%c] in format string",
2098                                    format[i+1], format[i+2]);
2099                                 break;
2100                             }
2101                         i++;
2102                         break;
2103                     case 'g':
2104                         Alignment_RYO_add_token(token_list,
2105                           Alignment_RYO_TOKEN_GENE_ORIENTATION, FALSE);
2106                         break;
2107                     case 'S':
2108                         Alignment_RYO_add_token(token_list,
2109                           Alignment_RYO_TOKEN_SUGAR_BLOCK, FALSE);
2110                         break;
2111                     case 'C':
2112                         Alignment_RYO_add_token(token_list,
2113                           Alignment_RYO_TOKEN_CIGAR_BLOCK, FALSE);
2114                         break;
2115                     case 'V':
2116                         Alignment_RYO_add_token(token_list,
2117                           Alignment_RYO_TOKEN_VULGAR_BLOCK, FALSE);
2118                         break;
2119                     case 'P':
2120                         i += Alignment_RYO_tokenise_PTO(token_list,
2121                                 format, i+2);
2122                         break;
2123                     default:
2124                         g_error("Unknown [%%%c] in format string [%s]",
2125                                 format[i+1], format);
2126                         break;
2127                     }
2128                 i++;
2129                 break;
2130             case '{':
2131                 Alignment_RYO_add_token(token_list,
2132                   Alignment_RYO_TOKEN_PTO_OPEN, FALSE);
2133                 break;
2134             case '}':
2135                 Alignment_RYO_add_token(token_list,
2136                   Alignment_RYO_TOKEN_PTO_CLOSE, FALSE);
2137                 break;
2138             default:
2139                 mini_str[0] = format[i];
2140                 Alignment_RYO_add_string(token_list, mini_str);
2141                 break;
2142             }
2143         }
2144     return token_list;
2145     }
2146 /*  %[qt][idlsSt] {query,target}{id,def,len,seq,Strand,type]
2147  *  %[qt]a[bels] {query,target}align{begin,end,len,seq}
2148  *  %s score
2149  *  %m model_name
2150  *  %r rank
2151  *  %p[isS] percent {id,similarity,self}
2152  *  %e[tid] equivalenced {total,identical,similar}
2153  *  %g gene_orientation
2154  *  %S sugar block
2155  *  %C cigar block
2156  *  %V vulgar block
2157  *  %% %
2158  *  \n newline
2159  *  \t tab
2160  *  \\ \
2161  *
2162  *  \{ {
2163  *  \} }
2164  *  { start per-transition section
2165  *  }   end per-transition section
2166  *  %P[qt][sabe] per-transition-output{query,target}
2167  *                                    {seq,advance,begin,end}
2168  *  %P[nsl] per-transition-output{name,score,label}
2169  */
2170 
Alignment_RYO_token_list_destroy(GPtrArray * token_list)2171 static void Alignment_RYO_token_list_destroy(GPtrArray *token_list){
2172     register gint i;
2173     register Alignment_RYO_ComplexToken *rct;
2174     for(i = 0; i < token_list->len; i++){
2175         rct = token_list->pdata[i];
2176         Alignment_RYO_ComplexToken_destroy(rct);
2177         }
2178     g_ptr_array_free(token_list, TRUE);
2179     return;
2180     }
2181 
2182 /**/
2183 
2184 typedef struct {
2185              Alignment *alignment;
2186     AlignmentOperation *ao;
2187                   gint  operation_id;
2188                   gint  operation_pos;
2189                   gint  query_pos;
2190                   gint  target_pos;
2191               C4_Score *cell;
2192               C4_Score *start_cell;
2193               C4_Score  score;
2194               gpointer  user_data; /* No ref stored */
2195 } Alignment_Position;
2196 
Alignment_Position_create(Alignment * alignment,gpointer user_data)2197 static Alignment_Position *Alignment_Position_create(
2198                     Alignment *alignment, gpointer user_data){
2199     register Alignment_Position *ap = g_new(Alignment_Position, 1);
2200     register gint i;
2201     register gint shadow_total
2202                     = 1 + alignment->model->total_shadow_designations;
2203     register C4_Calc *calc;
2204     ap->alignment = Alignment_share(alignment);
2205     ap->ao = alignment->operation_list->pdata[0];
2206     ap->operation_id = 0;
2207     ap->operation_pos = 0;
2208     ap->query_pos = alignment->region->query_start;
2209     ap->target_pos = alignment->region->target_start;
2210     ap->cell = g_new0(C4_Score, shadow_total);
2211     ap->user_data = user_data;
2212     if(alignment->model->start_state->cell_start_func){
2213         ap->start_cell = alignment->model->start_state->cell_start_func
2214             (ap->query_pos, ap->target_pos, user_data);
2215         for(i = 0; i < shadow_total; i++)
2216             ap->cell[i] = ap->start_cell[i];
2217         }
2218     if(alignment->model->init_func)
2219         alignment->model->init_func(alignment->region, user_data);
2220     for(i = 0; i < alignment->model->calc_list->len; i++){
2221         calc = alignment->model->calc_list->pdata[i];
2222         if(calc && calc->init_func)
2223             calc->init_func(alignment->region, user_data);
2224         }
2225     return ap;
2226     }
2227 
Alignment_Position_destroy(Alignment_Position * ap)2228 static void Alignment_Position_destroy(Alignment_Position *ap){
2229     register gint i;
2230     register C4_Calc *calc;
2231     for(i = 0; i < ap->alignment->model->calc_list->len; i++){
2232         calc = ap->alignment->model->calc_list->pdata[i];
2233         if(calc && calc->exit_func)
2234             calc->exit_func(ap->alignment->region, ap->user_data);
2235         }
2236     if(ap->alignment->model->exit_func)
2237         ap->alignment->model->exit_func(ap->alignment->region,
2238                                         ap->user_data);
2239     Alignment_destroy(ap->alignment);
2240     g_free(ap);
2241     return;
2242     }
2243 
Alignment_Position_set_shadows(Alignment_Position * ap)2244 static void Alignment_Position_set_shadows(Alignment_Position *ap){
2245     register gint i;
2246     register C4_State *state = ap->ao->transition->input;
2247     register C4_Shadow *shadow;
2248     for(i = 0; i < state->src_shadow_list->len; i++){
2249         shadow = state->src_shadow_list->pdata[i];
2250         ap->cell[1 + shadow->designation] = shadow->start_func(
2251                 ap->query_pos, ap->target_pos,
2252                 ap->user_data);
2253         }
2254     for(i = 0; i < ap->ao->transition->dst_shadow_list->len; i++){
2255         shadow = ap->ao->transition->dst_shadow_list->pdata[i];
2256         shadow->end_func(ap->cell[1 + shadow->designation],
2257                 ap->query_pos + ap->ao->transition->advance_query,
2258                 ap->target_pos + ap->ao->transition->advance_target,
2259                 ap->user_data);
2260         }
2261     return;
2262     }
2263 
Alignment_Position_next(Alignment_Position * ap)2264 static gboolean Alignment_Position_next(Alignment_Position *ap){
2265     ap->query_pos += ap->ao->transition->advance_query;
2266     ap->target_pos += ap->ao->transition->advance_target;
2267     /* Use next transition */
2268     if(++ap->operation_pos < ap->ao->length){
2269         Alignment_Position_set_shadows(ap);
2270         return TRUE;
2271         }
2272     /* Use next operation */
2273     if(++ap->operation_id < ap->alignment->operation_list->len){
2274         ap->operation_pos = 0;
2275         ap->ao = ap->alignment->operation_list->pdata[ap->operation_id];
2276         Alignment_Position_set_shadows(ap);
2277         return TRUE;
2278         }
2279     return FALSE;
2280     }
2281 
2282 /**/
2283 
2284 typedef struct {
2285         gint  begin;
2286         gint  end;
2287     Sequence *seq;
2288 } Alignment_Coding;
2289 
Alignment_Coding_create(Alignment * alignment,Sequence * query,Sequence * target,gpointer user_data,gboolean on_query)2290 static Alignment_Coding *Alignment_Coding_create(Alignment *alignment,
2291                                                  Sequence *query,
2292                                                  Sequence *target,
2293                                                  gpointer user_data,
2294                                                  gboolean on_query){
2295     register Alignment_Coding *ac = g_new(Alignment_Coding, 1);
2296     register Alignment_Position *ap
2297         = Alignment_Position_create(alignment, user_data);
2298     register Sequence *src = NULL;
2299     register GString *seq = g_string_sized_new(1024);
2300     register gint i, advance, pos;
2301     if(on_query){
2302         g_assert(query->alphabet->type == Alphabet_Type_DNA);
2303         src = query;
2304     } else {
2305         g_assert(target->alphabet->type == Alphabet_Type_DNA);
2306         src = target;
2307         }
2308     while(Alignment_Position_next(ap)){
2309         if(on_query){
2310             advance = ap->ao->transition->advance_query;
2311             pos = ap->query_pos;
2312         } else {
2313             advance = ap->ao->transition->advance_target;
2314             pos = ap->target_pos;
2315             }
2316         switch(ap->ao->transition->label){
2317             case C4_Label_MATCH:
2318                 if(advance == 3){
2319                     if(!seq->len)
2320                         ac->begin = pos;
2321                     g_string_append_c(seq, Sequence_get_symbol(src, pos));
2322                     g_string_append_c(seq, Sequence_get_symbol(src, pos+1));
2323                     g_string_append_c(seq, Sequence_get_symbol(src, pos+2));
2324                     ac->end = pos;
2325                     }
2326                 break;
2327             case C4_Label_SPLIT_CODON:
2328                 g_assert(advance);
2329                 g_assert(seq);
2330                 g_assert(seq->len);
2331                 for(i = 0; i < advance; i++)
2332                     g_string_append_c(seq, Sequence_get_symbol(src, pos+i));
2333                 break;
2334             case C4_Label_GAP:
2335                 if(advance == 3){
2336                     g_assert(seq);
2337                     g_assert(seq->len);
2338                     g_string_append_c(seq, Sequence_get_symbol(src, pos));
2339                     g_string_append_c(seq, Sequence_get_symbol(src, pos+1));
2340                     g_string_append_c(seq, Sequence_get_symbol(src, pos+2));
2341                     }
2342                 break;
2343             default: /* ignored */
2344                 break;
2345             }
2346         }
2347     Alignment_Position_destroy(ap);
2348     ac->seq = Sequence_create("coding", NULL, seq->str, seq->len,
2349                               src->strand, src->alphabet);
2350     g_string_free(seq, TRUE);
2351     return ac;
2352     }
2353 
Alignment_Coding_destroy(Alignment_Coding * ac)2354 static void Alignment_Coding_destroy(Alignment_Coding *ac){
2355     Sequence_destroy(ac->seq);
2356     g_free(ac);
2357     return;
2358     }
2359 
2360 /**/
2361 
Alignment_RYO_token_list_print(GPtrArray * token_list,Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gint rank,gpointer user_data,gpointer self_data,FILE * fp)2362 static void Alignment_RYO_token_list_print(GPtrArray *token_list,
2363             Alignment *alignment, Sequence *query, Sequence *target,
2364             Translate *translate, gint rank,
2365             gpointer user_data, gpointer self_data, FILE *fp){
2366     register gint i, j, pto_start = -1;
2367     register Alignment_RYO_ComplexToken *rct;
2368     register Sequence *seq, *subseq;
2369     register Alignment_Position *ap = NULL;
2370     register Alignment_Coding *qy_ac = NULL, *tg_ac = NULL, *ac;
2371     for(i = 0; i < token_list->len; i++){
2372         rct = token_list->pdata[i];
2373         seq = rct->on_query?query:target;
2374         switch(rct->token){
2375             case Alignment_RYO_TOKEN_STRING:
2376                 fprintf(fp, "%s", rct->str->str);
2377                 break;
2378             /**/
2379             case Alignment_RYO_TOKEN_ID:
2380                 fprintf(fp, "%s", seq->id);
2381                 break;
2382             case Alignment_RYO_TOKEN_DEF:
2383                 if(seq->def)
2384                     fprintf(fp, "%s", seq->def);
2385                 break;
2386             case Alignment_RYO_TOKEN_LEN:
2387                 fprintf(fp, "%d", seq->len);
2388                 break;
2389             case Alignment_RYO_TOKEN_STRAND:
2390                 fprintf(fp, "%c", Sequence_get_strand_as_char(seq));
2391                 break;
2392             case Alignment_RYO_TOKEN_SEQ:
2393                 Sequence_print_fasta_block(seq, fp);
2394                 break;
2395             case Alignment_RYO_TOKEN_TYPE:
2396                 fprintf(fp, "%s",
2397                         Alphabet_Type_get_name(seq->alphabet->type));
2398                 break;
2399             /**/
2400             case Alignment_RYO_TOKEN_ALIGN_BEGIN:
2401                 fprintf(fp, "%d",
2402                     Alignment_get_coordinate(alignment, query, target,
2403                         rct->on_query, TRUE));
2404                 break;
2405             case Alignment_RYO_TOKEN_ALIGN_END:
2406                 fprintf(fp, "%d",
2407                     Alignment_get_coordinate(alignment, query, target,
2408                         rct->on_query, FALSE));
2409                 break;
2410             case Alignment_RYO_TOKEN_ALIGN_LEN:
2411                 fprintf(fp, "%d",
2412                         rct->on_query?alignment->region->query_length
2413                                      :alignment->region->target_length);
2414                 break;
2415             case Alignment_RYO_TOKEN_ALIGN_SEQ:
2416                 if(rct->on_query){
2417                     subseq = Sequence_subseq(seq,
2418                                 alignment->region->query_start,
2419                                 alignment->region->query_length);
2420                 } else {
2421                     subseq = Sequence_subseq(seq,
2422                                 alignment->region->target_start,
2423                                 alignment->region->target_length);
2424                     }
2425                 Sequence_print_fasta_block(subseq, fp);
2426                 Sequence_destroy(subseq);
2427                 break;
2428             /**/
2429             case Alignment_RYO_TOKEN_CODING_BEGIN:
2430                 ac = rct->on_query?qy_ac:tg_ac;
2431                 if(!ac)
2432                     ac = Alignment_Coding_create(alignment,
2433                             query, target, user_data, rct->on_query);
2434                 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2435                               query, target,
2436                               ac->begin, ac->begin, rct->on_query));
2437                 break;
2438             case Alignment_RYO_TOKEN_CODING_END:
2439                 ac = rct->on_query?qy_ac:tg_ac;
2440                 if(!ac)
2441                     ac = Alignment_Coding_create(alignment,
2442                             query, target, user_data, rct->on_query);
2443                 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2444                               query, target,
2445                               ac->end, ac->end, rct->on_query));
2446                 break;
2447             case Alignment_RYO_TOKEN_CODING_LEN:
2448                 ac = rct->on_query?qy_ac:tg_ac;
2449                 if(!ac)
2450                     ac = Alignment_Coding_create(alignment,
2451                             query, target, user_data, rct->on_query);
2452                 fprintf(fp, "%d", ac->seq->len);
2453                 break;
2454             case Alignment_RYO_TOKEN_CODING_SEQ:
2455                 ac = rct->on_query?qy_ac:tg_ac;
2456                 if(!ac)
2457                     ac = Alignment_Coding_create(alignment,
2458                             query, target, user_data, rct->on_query);
2459                 Sequence_print_fasta_block(ac->seq, fp);
2460                 break;
2461             /**/
2462             case Alignment_RYO_TOKEN_SCORE:
2463                 fprintf(fp, "%d", alignment->score);
2464                 break;
2465             case Alignment_RYO_TOKEN_MODEL_NAME:
2466                 fprintf(fp, "%s", alignment->model->name);
2467                 break;
2468             case Alignment_RYO_TOKEN_RANK:
2469                 if(rank == -1)
2470                     fprintf(fp, "%%_EXONERATE_BESTN_RANK_%%");
2471                 else
2472                     fprintf(fp, "%d", rank);
2473                 break;
2474             /**/
2475             case Alignment_RYO_TOKEN_PERCENT_ID:
2476                 fprintf(fp, "%2.2f", Alignment_get_percent_score(
2477                                  alignment, query, target,
2478                                  translate, TRUE, user_data));
2479                 break;
2480             case Alignment_RYO_TOKEN_PERCENT_SIMILARITY:
2481                 fprintf(fp, "%2.2f", Alignment_get_percent_score(
2482                                  alignment, query, target,
2483                                  translate, FALSE, user_data));
2484                 break;
2485             case Alignment_RYO_TOKEN_PERCENT_SELF:
2486                 fprintf(fp, "%2.2f", Alignment_get_percent_self(
2487                                  alignment, query, target,
2488                                  translate, user_data, self_data));
2489                 break;
2490             case Alignment_RYO_TOKEN_GENE_ORIENTATION:
2491                 fprintf(fp, "%c",
2492                      Alignment_get_gene_orientation(alignment));
2493                 break;
2494             /**/
2495             case Alignment_RYO_TOKEN_EQUIVALENCED_TOTAL:
2496                 fprintf(fp, "%d", Alignment_get_equivalenced_total(
2497                                  alignment));
2498                 break;
2499             case Alignment_RYO_TOKEN_EQUIVALENCED_IDENTCAL:
2500                 fprintf(fp, "%d", Alignment_get_equivalenced_matching(
2501                                  alignment, query, target,
2502                                  translate, TRUE, user_data));
2503                 break;
2504             case Alignment_RYO_TOKEN_EQUIVALENCED_SIMILAR:
2505                 fprintf(fp, "%d", Alignment_get_equivalenced_matching(
2506                                  alignment, query, target,
2507                                  translate, FALSE, user_data));
2508                 break;
2509             case Alignment_RYO_TOKEN_EQUIVALENCED_MISMATCHES:
2510                 fprintf(fp, "%d",
2511                         Alignment_get_equivalenced_total(alignment)
2512                       - Alignment_get_equivalenced_matching(
2513                                  alignment, query, target,
2514                                  translate, TRUE, user_data));
2515                 break;
2516             /**/
2517             case Alignment_RYO_TOKEN_SUGAR_BLOCK:
2518                 Alignment_print_sugar_block(alignment, query, target, fp);
2519                 break;
2520             case Alignment_RYO_TOKEN_CIGAR_BLOCK:
2521                 Alignment_print_cigar_block(alignment, query, target, fp);
2522                 break;
2523             case Alignment_RYO_TOKEN_VULGAR_BLOCK:
2524                 Alignment_print_vulgar_block(alignment, query, target, fp);
2525                 break;
2526             /**/
2527             case Alignment_RYO_TOKEN_PTO_OPEN:
2528                 if(pto_start != -1)
2529                     g_error("Cannot nest PTO brackets");
2530                 pto_start = i;
2531                 ap = Alignment_Position_create(alignment, user_data);
2532                 break;
2533             case Alignment_RYO_TOKEN_PTO_CLOSE:
2534                 if(pto_start == -1)
2535                     g_error("No opening PTO bracket in --ryo string");
2536                 if(Alignment_Position_next(ap)){
2537                     i = pto_start;
2538                 } else {
2539                     pto_start = -1;
2540                     Alignment_Position_destroy(ap);
2541                     ap = NULL;
2542                     }
2543                 break;
2544             /**/
2545             case Alignment_RYO_TOKEN_PTO_SEQ:
2546                 g_assert(pto_start != -1);
2547                 if(rct->on_query){
2548                     if(ap->ao->transition->advance_query){
2549                         for(j = 0; j < ap->ao->transition->advance_query; j++)
2550                             fprintf(fp, "%c",
2551                                 Sequence_get_symbol(query, ap->query_pos+j));
2552                     } else {
2553                         fprintf(fp, "-");
2554                         }
2555                 } else {
2556                     if(ap->ao->transition->advance_target){
2557                         for(j = 0; j < ap->ao->transition->advance_target; j++)
2558                             fprintf(fp, "%c",
2559                                 Sequence_get_symbol(target, ap->target_pos+j));
2560                     } else {
2561                         fprintf(fp, "-");
2562                         }
2563                     }
2564                 break;
2565             case Alignment_RYO_TOKEN_PTO_ADVANCE:
2566                 fprintf(fp, "%d",
2567                    rct->on_query?ap->ao->transition->advance_query
2568                                 :ap->ao->transition->advance_target);
2569                 break;
2570             case Alignment_RYO_TOKEN_PTO_BEGIN:
2571                 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2572                               query, target,
2573                               ap->query_pos, ap->target_pos, rct->on_query));
2574                 break;
2575             case Alignment_RYO_TOKEN_PTO_END:
2576                 fprintf(fp, "%d", Alignment_convert_coordinate(alignment,
2577                               query, target,
2578                               ap->query_pos+ap->ao->transition->advance_query,
2579                               ap->target_pos+ap->ao->transition->advance_target,
2580                               rct->on_query));
2581                 break;
2582             /**/
2583             case Alignment_RYO_TOKEN_PTO_NAME:
2584                 fprintf(fp, "%s", ap->ao->transition->name);
2585                 break;
2586             case Alignment_RYO_TOKEN_PTO_SCORE:
2587                 fprintf(fp, "%d",
2588                         C4_Calc_score(ap->ao->transition->calc,
2589                                       ap->query_pos,
2590                                       ap->target_pos, user_data));
2591                 break;
2592             case Alignment_RYO_TOKEN_PTO_LABEL:
2593                 fprintf(fp, "%s",
2594                     C4_Label_get_name(ap->ao->transition->label));
2595                 break;
2596             default:
2597                 g_error("Unknown token [%d]", rct->token);
2598                 break;
2599             }
2600         }
2601     if(pto_start != -1)
2602         g_error("No closing PTO bracket in --ryo string");
2603     if(qy_ac)
2604         Alignment_Coding_destroy(qy_ac);
2605     if(tg_ac)
2606         Alignment_Coding_destroy(tg_ac);
2607     return;
2608     }
2609 
Alignment_display_ryo(Alignment * alignment,Sequence * query,Sequence * target,gchar * format,Translate * translate,gint rank,gpointer user_data,gpointer self_data,FILE * fp)2610 void Alignment_display_ryo(Alignment *alignment,
2611         Sequence *query, Sequence *target, gchar *format,
2612         Translate *translate, gint rank,
2613         gpointer user_data, gpointer self_data, FILE *fp){
2614     register GPtrArray *token_list = Alignment_RYO_tokenise(format);
2615     Alignment_RYO_token_list_print(token_list, alignment,
2616                                    query, target, translate, rank,
2617                                    user_data, self_data, fp);
2618     Alignment_RYO_token_list_destroy(token_list);
2619     return;
2620     }
2621 
Alignment_display_sugar(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)2622 void Alignment_display_sugar(Alignment *alignment,
2623                              Sequence *query, Sequence *target, FILE *fp){
2624     fprintf(fp, "sugar: ");
2625     Alignment_print_sugar_block(alignment, query, target, fp);
2626     fprintf(fp, "\n");
2627     return;
2628     }
2629 /* sugar: simple ungapped alignment report
2630  */
2631 
Alignment_display_cigar(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)2632 void Alignment_display_cigar(Alignment *alignment,
2633                              Sequence *query, Sequence *target, FILE *fp){
2634     fprintf(fp, "cigar: ");
2635     Alignment_print_sugar_block(alignment, query, target, fp);
2636     fprintf(fp, " ");
2637     Alignment_print_cigar_block(alignment, query, target, fp);
2638     fprintf(fp, "\n");
2639     return;
2640     }
2641 /* cigar: concise idiosyncratic gapped alignment report
2642  * query_id query_start query_end query_strand \
2643  * target_id target_start target_end target_strand \
2644  * score \
2645  * <[MID] length> (m=Match i=Insert d=Delete)
2646  *
2647  */
2648 
Alignment_display_vulgar(Alignment * alignment,Sequence * query,Sequence * target,FILE * fp)2649 void Alignment_display_vulgar(Alignment *alignment,
2650                               Sequence *query, Sequence *target, FILE *fp){
2651     fprintf(fp, "vulgar: ");
2652     Alignment_print_sugar_block(alignment, query, target, fp);
2653     fprintf(fp, " ");
2654     Alignment_print_vulgar_block(alignment, query, target, fp);
2655     fprintf(fp, "\n");
2656     return;
2657     }
2658 
2659 /**/
2660 
Alignment_display_gff_header(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,FILE * fp)2661 static void Alignment_display_gff_header(Alignment *alignment,
2662         Sequence *query, Sequence *target, gboolean report_on_query, FILE *fp){
2663     time_t timenow = time(NULL);
2664     register struct tm *tm_ptr = localtime(&timenow);
2665     gchar curr_time_str[12];
2666     strftime(curr_time_str, 30, "%Y-%m-%d", tm_ptr);
2667     fprintf(fp, "#\n"
2668                 "##gff-version 2\n"
2669                 "##source-version %s:%s %s\n"
2670                 "##date %s\n"
2671                 "##type %s\n"
2672                 "#\n",
2673                 PACKAGE, alignment->model->name, VERSION,
2674                 curr_time_str,
2675                 Alphabet_Type_get_name(report_on_query
2676                                       ? query->alphabet->type
2677                                       : target->alphabet->type));
2678     fprintf(fp, "#\n# seqname source feature start end"
2679                 " score strand frame attributes\n#\n");
2680     return;
2681     }
2682 
Alignment_display_gff_line(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,gchar * feature,gint query_start,gint target_start,gint query_end,gint target_end,gboolean show_score,gint score,gboolean show_frame,gint frame,GPtrArray * attribute_list,FILE * fp)2683 static void Alignment_display_gff_line(Alignment *alignment,
2684                                        Sequence *query,
2685                                        Sequence *target,
2686                                        gboolean report_on_query,
2687                                        gchar *feature,
2688                                        gint query_start,
2689                                        gint target_start,
2690                                        gint query_end,
2691                                        gint target_end,
2692                                        gboolean show_score, gint score,
2693                                        gboolean show_frame, gint frame,
2694                                        GPtrArray *attribute_list, FILE *fp){
2695     /* <seqname> <source> <feature> <start> <end> <score> <strand> <frame>
2696      * [attributes] [comments]
2697      */
2698     register Sequence *seq;
2699     register gint i, start, end, t;
2700     register gchar *attribute;
2701     if(report_on_query){
2702         seq = query;
2703         start = query_start;
2704         end = query_end;
2705     } else {
2706         seq = target;
2707         start = target_start;
2708         end = target_end;
2709         }
2710     if(seq->strand == Sequence_Strand_REVCOMP){
2711         start = seq->len - start;
2712         end = seq->len - end;
2713         /* swap coords for gff output */
2714         t = start;
2715         start = end;
2716         end = t;
2717         }
2718     /* Sanity checks for valid GFF output */
2719     g_assert(start >= 0);
2720     g_assert(end <= seq->len);
2721     g_assert(start < end);
2722     g_assert((!show_frame) || ((frame >= 0) && (frame <= 2)));
2723     fprintf(fp, "%s\t%s:%s\t%s\t%d\t%d\t",
2724             seq->id, PACKAGE, alignment->model->name, feature, start+1, end);
2725     if(show_score)
2726         fprintf(fp, "%d", score);
2727     else
2728         fprintf(fp, ".");
2729     fprintf(fp, "\t%c\t", Sequence_get_strand_as_char(seq));
2730     if(show_frame)
2731         fprintf(fp, "%d", frame);
2732     else
2733         fprintf(fp, ".");
2734     fprintf(fp, "\t");
2735     if(attribute_list){
2736         for(i = 0; i < attribute_list->len; i++){
2737             attribute = attribute_list->pdata[i];
2738             fprintf(fp, "%s", attribute);
2739             if((i+1) < attribute_list->len)
2740                 fprintf(fp, " ; ");
2741             }
2742         }
2743     fprintf(fp, "\n");
2744     return;
2745     }
2746 
Alignment_free_attribute_list(GPtrArray * attribute_list)2747 static void Alignment_free_attribute_list(GPtrArray *attribute_list){
2748     register gint i;
2749     for(i = 0; i < attribute_list->len; i++)
2750         g_free(attribute_list->pdata[i]);
2751     g_ptr_array_free(attribute_list, TRUE);
2752     return;
2753     }
2754 
Alignment_display_gff_exon(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_on_query,gint query_pos,gint target_pos,gint exon_query_start,gint exon_target_start,gint exon_query_gap,gint exon_target_gap,gint exon_query_frameshift,gint exon_target_frameshift,gpointer user_data,FILE * fp)2755 static void Alignment_display_gff_exon(Alignment *alignment,
2756                                        Sequence *query,
2757                                        Sequence *target,
2758                                        Translate *translate,
2759                                        gboolean report_on_query,
2760                                        gint query_pos,
2761                                        gint target_pos,
2762                                        gint exon_query_start,
2763                                        gint exon_target_start,
2764                                        gint exon_query_gap,
2765                                        gint exon_target_gap,
2766                                        gint exon_query_frameshift,
2767                                        gint exon_target_frameshift,
2768                                        gpointer user_data, FILE *fp){
2769     register GPtrArray *attribute_list = g_ptr_array_new();
2770     g_ptr_array_add(attribute_list,
2771                     g_strdup_printf("insertions %d",
2772                                     report_on_query?exon_query_gap
2773                                                    :exon_target_gap));
2774     g_ptr_array_add(attribute_list,
2775                     g_strdup_printf("deletions %d",
2776                                     report_on_query?exon_target_gap
2777                                                    :exon_query_gap));
2778     g_ptr_array_add(attribute_list,
2779                     g_strdup_printf("identity %2.2f",
2780                                  Alignment_get_percent_score_region(alignment,
2781                                  query, target, translate, TRUE, user_data,
2782                                  exon_query_start, query_pos)));
2783     g_ptr_array_add(attribute_list,
2784                     g_strdup_printf("similarity %2.2f",
2785                                  Alignment_get_percent_score_region(alignment,
2786                                  query, target, translate, FALSE, user_data,
2787                                  exon_query_start, query_pos)));
2788     if(report_on_query){
2789         if(exon_query_frameshift)
2790             g_ptr_array_add(attribute_list,
2791                             g_strdup_printf("frameshifts %d",
2792                                             exon_query_frameshift));
2793     } else {
2794         if(exon_target_frameshift)
2795             g_ptr_array_add(attribute_list,
2796                             g_strdup_printf("frameshifts %d",
2797                                             exon_target_frameshift));
2798         }
2799     Alignment_display_gff_line(alignment, query, target,
2800                                report_on_query, "exon",
2801                                exon_query_start, exon_target_start,
2802                                query_pos, target_pos,
2803                                FALSE, 0, FALSE, 0, attribute_list, fp);
2804     Alignment_free_attribute_list(attribute_list);
2805     return;
2806     }
2807 
Alignment_display_gff_utr(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,gboolean post_cds,gint cds_query_start,gint cds_target_start,gint cds_query_end,gint cds_target_end,gint exon_query_start,gint exon_target_start,gint query_pos,gint target_pos,FILE * fp)2808 static void Alignment_display_gff_utr(Alignment *alignment,
2809                             Sequence *query, Sequence *target,
2810                             gboolean report_on_query, gboolean post_cds,
2811                             gint cds_query_start, gint cds_target_start,
2812                             gint cds_query_end, gint cds_target_end,
2813                             gint exon_query_start, gint exon_target_start,
2814                             gint query_pos, gint target_pos, FILE *fp){
2815     register gint curr_cds_query_start, curr_cds_target_start,
2816                   curr_utr_query_start, curr_utr_target_start;
2817     if(post_cds){
2818         curr_utr_query_start = MAX(exon_query_start, cds_query_end);
2819         curr_utr_target_start = MAX(exon_target_start, cds_target_end);
2820         Alignment_display_gff_line(alignment, query, target,
2821                                report_on_query, "utr3",
2822                                curr_utr_query_start, curr_utr_target_start,
2823                                query_pos, target_pos,
2824                                FALSE, 0, FALSE, 0, NULL, fp);
2825     } else if(cds_query_start == -1){
2826         Alignment_display_gff_line(alignment, query, target,
2827                                report_on_query, "utr5",
2828                                exon_query_start, exon_target_start,
2829                                query_pos, target_pos,
2830                                FALSE, 0, FALSE, 0, NULL, fp);
2831     } else {
2832         curr_cds_query_start = MAX(cds_query_start,
2833                                    exon_query_start);
2834         curr_cds_target_start = MAX(cds_target_start,
2835                                     exon_target_start);
2836         Alignment_display_gff_line(alignment, query, target,
2837                                report_on_query, "cds",
2838                                curr_cds_query_start, curr_cds_target_start,
2839                                query_pos, target_pos,
2840                                FALSE, 0, FALSE, 0, NULL, fp);
2841         }
2842     return;
2843     }
2844 
Alignment_display_gff_gene(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_on_query,gint result_id,gpointer user_data,FILE * fp)2845 static void Alignment_display_gff_gene(Alignment *alignment,
2846         Sequence *query, Sequence *target, Translate *translate,
2847         gboolean report_on_query,
2848         gint result_id, gpointer user_data, FILE *fp){
2849     register gint i;
2850     register gint query_pos = alignment->region->query_start,
2851                   target_pos = alignment->region->target_start;
2852     register gint intron_id = 0, intron_length = 0;
2853     register gint exon_query_start = 0, exon_target_start = 0;
2854     register gint exon_query_gap = 0, exon_target_gap = 0;
2855     register gint exon_query_frameshift = 0, exon_target_frameshift = 0;
2856     register gint cds_query_start = -1, cds_target_start = -1,
2857                   cds_query_end = -1, cds_target_end = -1;
2858     register gboolean in_exon = FALSE, post_cds = FALSE;
2859     register AlignmentOperation *ao;
2860     register gchar gene_orientation
2861                = Alignment_get_gene_orientation(alignment);
2862     register GPtrArray *attribute_list = g_ptr_array_new();
2863     register gint curr_utr_query_start, curr_utr_target_start;
2864     /**/
2865     g_ptr_array_add(attribute_list,
2866                     g_strdup_printf("gene_id %d", result_id));
2867     g_ptr_array_add(attribute_list,
2868                     g_strdup_printf("sequence %s",
2869                         report_on_query?target->id:query->id));
2870     g_ptr_array_add(attribute_list,
2871                     g_strdup_printf("gene_orientation %c", gene_orientation));
2872     g_ptr_array_add(attribute_list,
2873                     g_strdup_printf("identity %2.2f",
2874                                  Alignment_get_percent_score(alignment,
2875                                  query, target, translate, TRUE, user_data)));
2876     g_ptr_array_add(attribute_list,
2877                     g_strdup_printf("similarity %2.2f",
2878                                  Alignment_get_percent_score(alignment,
2879                                  query, target, translate, FALSE, user_data)));
2880     Alignment_display_gff_line(alignment, query, target, report_on_query,
2881                                "gene",
2882                                alignment->region->query_start,
2883                                alignment->region->target_start,
2884                                Region_query_end(alignment->region),
2885                                Region_target_end(alignment->region),
2886                                TRUE, alignment->score,
2887                                FALSE, 0, attribute_list, fp);
2888     Alignment_free_attribute_list(attribute_list);
2889     for(i = 1; i < alignment->operation_list->len; i++){
2890         ao = alignment->operation_list->pdata[i];
2891         switch(ao->transition->label){
2892             case C4_Label_MATCH:
2893                 if((ao->transition->advance_query == 1)
2894                 && (ao->transition->advance_target == 1)){
2895                     if((cds_query_start != -1) && (!post_cds)){
2896                         Alignment_display_gff_line(alignment, query, target,
2897                                report_on_query, "cds",
2898                                exon_query_start, exon_target_start,
2899                                query_pos, target_pos,
2900                                FALSE, 0, FALSE, 0, NULL, fp);
2901                         post_cds = TRUE;
2902                         }
2903                 } else {
2904                     if(cds_query_start == -1){ /* First coding exon */
2905                         if(in_exon){ /* Have UTR5 */
2906                             Alignment_display_gff_line(alignment, query, target,
2907                                report_on_query, "utr5",
2908                                exon_query_start, exon_target_start,
2909                                query_pos, target_pos,
2910                                FALSE, 0, FALSE, 0, NULL, fp);
2911                             }
2912                         cds_query_start = query_pos;
2913                         cds_target_start = target_pos;
2914                         }
2915                     cds_query_end = query_pos
2916                                   + (ao->transition->advance_query
2917                                    * ao->length);
2918                     cds_target_end = target_pos
2919                                    + (ao->transition->advance_target
2920                                     * ao->length);
2921                     }
2922                 /*fallthrough*/
2923             case C4_Label_SPLIT_CODON:
2924                 if(!in_exon){
2925                     exon_query_start = query_pos;
2926                     exon_target_start = target_pos;
2927                     exon_query_gap = 0;
2928                     exon_target_gap = 0;
2929                     exon_query_frameshift = 0;
2930                     exon_target_frameshift = 0;
2931                     in_exon = TRUE;
2932                     }
2933                 break;
2934             case C4_Label_NONE:
2935                 break;
2936             case C4_Label_GAP:
2937                 exon_query_gap += (ao->transition->advance_query
2938                                    * ao->length);
2939                 exon_target_gap += (ao->transition->advance_target
2940                                    * ao->length);
2941                 break;
2942             case C4_Label_5SS:
2943                 if(report_on_query){
2944                     g_assert(ao->transition->advance_query == 2);
2945                     g_assert(ao->transition->advance_target == 0);
2946                 } else {
2947                     g_assert(ao->transition->advance_query == 0);
2948                     g_assert(ao->transition->advance_target == 2);
2949                     }
2950                 if(in_exon){
2951                     Alignment_display_gff_utr(alignment,
2952                             query, target, report_on_query, post_cds,
2953                             cds_query_start, cds_target_start,
2954                             cds_query_end, cds_target_end,
2955                             exon_query_start, exon_target_start,
2956                             query_pos, target_pos, fp);
2957                     Alignment_display_gff_exon(alignment,
2958                           query, target, translate, report_on_query,
2959                           query_pos, target_pos,
2960                           exon_query_start, exon_target_start,
2961                           exon_query_gap, exon_target_gap,
2962                           exon_query_frameshift,
2963                           exon_target_frameshift, user_data, fp);
2964                     in_exon = FALSE;
2965                     }
2966                 attribute_list = g_ptr_array_new();
2967                 g_ptr_array_add(attribute_list,
2968                                 g_strdup_printf("intron_id %d", intron_id+1));
2969                 g_ptr_array_add(attribute_list,
2970                                 g_strdup_printf("splice_site \"%c%c\"",
2971                    report_on_query?Sequence_get_symbol(query, query_pos)
2972                                   :Sequence_get_symbol(target, target_pos),
2973                    report_on_query?Sequence_get_symbol(query, query_pos+1)
2974                                   :Sequence_get_symbol(target, target_pos+1)));
2975                 Alignment_display_gff_line(alignment, query, target,
2976                                report_on_query, "splice5",
2977                                query_pos, target_pos,
2978                                query_pos+2, target_pos+2,
2979                                FALSE, 0, FALSE, 0, attribute_list, fp);
2980                 Alignment_free_attribute_list(attribute_list);
2981                 intron_length = 0;
2982                 break;
2983             case C4_Label_3SS:
2984                 if(report_on_query){
2985                     g_assert(ao->transition->advance_query == 2);
2986                     g_assert(ao->transition->advance_target == 0);
2987                 } else {
2988                     g_assert(ao->transition->advance_query == 0);
2989                     g_assert(ao->transition->advance_target == 2);
2990                     }
2991                 if(in_exon){
2992                     Alignment_display_gff_utr(alignment,
2993                             query, target, report_on_query, post_cds,
2994                             cds_query_start, cds_target_start,
2995                             cds_query_end, cds_target_end,
2996                             exon_query_start, exon_target_start,
2997                             query_pos, target_pos, fp);
2998                     Alignment_display_gff_exon(alignment,
2999                           query, target, translate, report_on_query,
3000                           query_pos, target_pos,
3001                           exon_query_start, exon_target_start,
3002                           exon_query_gap, exon_target_gap,
3003                           exon_query_frameshift,
3004                           exon_target_frameshift, user_data, fp);
3005                     in_exon = FALSE;
3006                     }
3007                 if(gene_orientation == '+'){
3008                     attribute_list = g_ptr_array_new();
3009                     g_ptr_array_add(attribute_list,
3010                                     g_strdup_printf("intron_id %d", ++intron_id));
3011                     Alignment_display_gff_line(alignment, query, target,
3012                                    report_on_query, "intron",
3013                                    query_pos-intron_length-2,
3014                                    target_pos-intron_length-2,
3015                                    query_pos+2, target_pos+2,
3016                                    FALSE, 0, FALSE, 0, attribute_list, fp);
3017                     Alignment_free_attribute_list(attribute_list);
3018                     }
3019                 attribute_list = g_ptr_array_new();
3020                 g_ptr_array_add(attribute_list,
3021                                 g_strdup_printf("intron_id %d", intron_id-1));
3022                 g_ptr_array_add(attribute_list,
3023                                 g_strdup_printf("splice_site \"%c%c\"",
3024                    report_on_query?Sequence_get_symbol(query, query_pos)
3025                                   :Sequence_get_symbol(target, target_pos),
3026                    report_on_query?Sequence_get_symbol(query, query_pos+1)
3027                                   :Sequence_get_symbol(target, target_pos+1)));
3028                 Alignment_display_gff_line(alignment, query, target,
3029                                report_on_query, "splice3",
3030                                query_pos, target_pos,
3031                                query_pos+2, target_pos+2,
3032                                FALSE, 0, FALSE, 0, attribute_list, fp);
3033                 Alignment_free_attribute_list(attribute_list);
3034                 intron_length = 0;
3035                 break;
3036             case C4_Label_INTRON:
3037                 if(report_on_query){
3038                     g_assert(ao->transition->advance_query == 1);
3039                     g_assert(ao->transition->advance_target == 0);
3040                 } else {
3041                     g_assert(ao->transition->advance_query == 0);
3042                     g_assert(ao->transition->advance_target == 1);
3043                     }
3044                 intron_length += ao->length;
3045                 break;
3046             case C4_Label_FRAMESHIFT:
3047                 exon_query_frameshift
3048                     += (ao->transition->advance_query * ao->length);
3049                 exon_target_frameshift
3050                     += (ao->transition->advance_target * ao->length);
3051                 break;
3052             case C4_Label_NER:
3053                 g_error("Unexpected NER for gff gene output");
3054                 break;
3055             default:
3056                 g_error("Unknown C4_Label [%s]",
3057                         C4_Label_get_name(ao->transition->label));
3058                 break;
3059             }
3060         query_pos += (ao->transition->advance_query * ao->length);
3061         target_pos += (ao->transition->advance_target * ao->length);
3062         }
3063     if(in_exon){
3064         if(cds_query_end != -1){
3065             if(cds_query_end != query_pos){ /* Have 3' UTR */
3066                 curr_utr_query_start = MAX(exon_query_start, cds_query_end);
3067                 curr_utr_target_start = MAX(exon_target_start, cds_target_end);
3068                 Alignment_display_gff_line(alignment, query, target,
3069                                report_on_query, "utr3b",
3070                                curr_utr_query_start,
3071                                curr_utr_target_start,
3072                                query_pos, target_pos,
3073                                FALSE, 0, FALSE, 0, NULL, fp);
3074             } else {
3075                 Alignment_display_gff_line(alignment, query, target,
3076                                report_on_query, "cds",
3077                                exon_query_start, exon_target_start,
3078                                query_pos, target_pos,
3079                                FALSE, 0, FALSE, 0, NULL, fp);
3080                 }
3081             }
3082         Alignment_display_gff_exon(alignment,
3083               query, target, translate, report_on_query,
3084               query_pos, target_pos,
3085               exon_query_start, exon_target_start,
3086               exon_query_gap, exon_target_gap,
3087               exon_query_frameshift, exon_target_frameshift, user_data, fp);
3088         }
3089     return;
3090     }
3091 
Alignment_display_gff_similarity(Alignment * alignment,Sequence * query,Sequence * target,gboolean report_on_query,gint result_id,FILE * fp)3092 static void Alignment_display_gff_similarity(Alignment *alignment,
3093         Sequence *query, Sequence *target, gboolean report_on_query,
3094         gint result_id, FILE *fp){
3095     register gint i, qp, tp;
3096     register gint query_pos = alignment->region->query_start,
3097                   target_pos = alignment->region->target_start;
3098     register AlignmentOperation *ao;
3099     register GPtrArray *attribute_list = g_ptr_array_new();
3100     g_ptr_array_add(attribute_list,
3101                     g_strdup_printf("alignment_id %d", result_id));
3102     if(report_on_query)
3103         g_ptr_array_add(attribute_list,
3104                     g_strdup_printf("Target %s", target->id));
3105     else
3106         g_ptr_array_add(attribute_list,
3107                     g_strdup_printf("Query %s", query->id));
3108     /* Align <seq_start> <target_start> [<length>] ; */
3109     for(i = 1; i < alignment->operation_list->len; i++){
3110         ao = alignment->operation_list->pdata[i];
3111         switch(ao->transition->label){
3112             case C4_Label_MATCH:
3113                 qp = query_pos;
3114                 tp = target_pos;
3115                 if(query->strand == Sequence_Strand_REVCOMP)
3116                     qp = query->len - qp;
3117                 if(target->strand == Sequence_Strand_REVCOMP)
3118                     tp = target->len - tp;
3119                 if(report_on_query){
3120                     g_ptr_array_add(attribute_list,
3121                      g_strdup_printf("Align %d %d %d", qp+1, tp+1,
3122                                      ao->length*ao->transition->advance_query));
3123                 } else {
3124                     g_ptr_array_add(attribute_list,
3125                      g_strdup_printf("Align %d %d %d", tp+1, qp+1,
3126                                      ao->length*ao->transition->advance_target));
3127                     }
3128                 break;
3129             case C4_Label_NONE:
3130             case C4_Label_GAP:
3131             case C4_Label_NER:
3132             case C4_Label_5SS:
3133             case C4_Label_3SS:
3134             case C4_Label_INTRON:
3135             case C4_Label_SPLIT_CODON:
3136             case C4_Label_FRAMESHIFT:
3137                 break;
3138             default:
3139                 g_error("Unknown C4_Label [%s]",
3140                         C4_Label_get_name(ao->transition->label));
3141                 break;
3142             }
3143         query_pos += (ao->transition->advance_query * ao->length);
3144         target_pos += (ao->transition->advance_target * ao->length);
3145         }
3146     Alignment_display_gff_line(alignment, query, target, report_on_query,
3147                                "similarity",
3148                                alignment->region->query_start,
3149                                alignment->region->target_start,
3150                                Region_query_end(alignment->region),
3151                                Region_target_end(alignment->region),
3152                                TRUE, alignment->score,
3153                                FALSE, 0, attribute_list, fp);
3154     Alignment_free_attribute_list(attribute_list);
3155     return;
3156     }
3157 
3158 /**/
3159 
Alignment_display_gff(Alignment * alignment,Sequence * query,Sequence * target,Translate * translate,gboolean report_on_query,gboolean report_on_genomic,gint result_id,gpointer user_data,FILE * fp)3160 void Alignment_display_gff(Alignment *alignment,
3161                            Sequence *query, Sequence *target,
3162                            Translate *translate,
3163                            gboolean report_on_query,
3164                            gboolean report_on_genomic,
3165                            gint result_id, gpointer user_data, FILE *fp){
3166     fprintf(fp, "# --- START OF GFF DUMP ---\n#\n");
3167     Alignment_display_gff_header(alignment, query, target,
3168                                  report_on_query, fp);
3169     if(report_on_genomic){
3170         Alignment_display_gff_gene(alignment, query, target, translate,
3171                                    report_on_query, result_id, user_data, fp);
3172         }
3173     Alignment_display_gff_similarity(alignment, query, target,
3174                                      report_on_query, result_id, fp);
3175     fprintf(fp, "# --- END OF GFF DUMP ---\n#\n");
3176     return;
3177     }
3178 
3179 /**/
3180 
3181 /*
3182 features: gene,intron,exon,splice[35],similarity
3183 */
3184 
3185 /**/
3186 
3187 #ifndef G_DISABLE_ASSERT
Alignment_has_valid_alignment(Alignment * alignment,gpointer user_data)3188 static gboolean Alignment_has_valid_alignment(Alignment *alignment,
3189                                               gpointer user_data){
3190     register gint query_pos = alignment->region->query_start,
3191                   target_pos = alignment->region->target_start;
3192     register C4_Score score;
3193     register C4_Calc *calc;
3194     register AlignmentOperation *alignment_operation;
3195     register C4_State *state;
3196     register C4_Transition *transition;
3197     register C4_Shadow *shadow;
3198     register gint i, j, k, t;
3199     register C4_Score *start_cell, *cell
3200      = g_new0(C4_Score, 1+alignment->model->total_shadow_designations);
3201 /* FIXME: temp */
3202 /* #define DEBUG */
3203 #ifdef DEBUG
3204     g_message("Check model [%s] align score [%d]", alignment->model->name,
3205             alignment->score);
3206     for(i = 0; i < alignment->model->transition_list->len; i++){
3207         transition = alignment->model->transition_list->pdata[i];
3208         g_message("Check transition (%d) [%s] has dst_shad[%d] (%s:%s)",
3209                 i, transition->name, transition->dst_shadow_list->len,
3210                 transition->input->name, transition->output->name);
3211         }
3212     Region_print(alignment->region, "Alignment_has_valid_alignment");
3213 #endif /* DEBUG */
3214     if(alignment->model->init_func)
3215         alignment->model->init_func(alignment->region, user_data);
3216     for(i = 0; i < alignment->model->calc_list->len; i++){
3217         calc = alignment->model->calc_list->pdata[i];
3218         if(calc && calc->init_func)
3219             calc->init_func(alignment->region, user_data);
3220         }
3221     if(alignment->model->start_state->cell_start_func){
3222         start_cell = alignment->model->start_state->cell_start_func
3223             (query_pos, target_pos, user_data);
3224 #ifdef DEBUG
3225         g_print("USING start cell from (%d,%d): ",
3226                 query_pos, target_pos);
3227 #endif /* DEBUG */
3228         for(i = 0;
3229             i < (1 + alignment->model->total_shadow_designations); i++){
3230             cell[i] = start_cell[i];
3231 #ifdef DEBUG
3232             g_print("[%d]", cell[i]);
3233 #endif /* DEBUG */
3234             }
3235 #ifdef DEBUG
3236         g_print("\n");
3237 #endif /* DEBUG */
3238         }
3239     score = cell[0];
3240 #ifdef DEBUG
3241     g_message("start with [%d] (%s)", score,
3242             alignment->model->name);
3243 #endif /* DEBUG */
3244     for(i = 0; i < alignment->operation_list->len; i++){
3245         alignment_operation = alignment->operation_list->pdata[i];
3246         for(j = 0; j < alignment_operation->length; j++){
3247             state = alignment_operation->transition->input;
3248             for(k = 0; k < state->src_shadow_list->len; k++){
3249                 shadow = state->src_shadow_list->pdata[k];
3250 /* #define DEBUG_SHADOWS */
3251 #ifdef DEBUG_SHADOWS
3252                 g_message("start_func on [%s:%s:%s] (%d,%d) shadow(%d)",
3253                           state->name,
3254                           alignment_operation->transition->name,
3255                           shadow->name,
3256                           query_pos, target_pos, k);
3257 #endif /* DEBUG_SHADOWS */
3258                 cell[1 + shadow->designation] = shadow->start_func(
3259                   query_pos, target_pos, user_data);
3260                 }
3261             transition = alignment_operation->transition;
3262             for(k = 0; k < transition->dst_shadow_list->len; k++){
3263                 shadow = transition->dst_shadow_list->pdata[k];
3264 #ifdef DEBUG_SHADOWS
3265                 g_message("end_func on [%s:%s] (%d,%d) shadow(%d)=(%d)",
3266                           transition->name,
3267                           shadow->name,
3268                           query_pos, target_pos, k,
3269                           cell[1 + shadow->designation]);
3270 #endif /* DEBUG_SHADOWS */
3271                 shadow->end_func(cell[1 + shadow->designation],
3272                   query_pos, target_pos, user_data);
3273                 }
3274             /**/
3275             t = C4_Calc_score(alignment_operation->transition->calc,
3276                               query_pos, target_pos, user_data);
3277             score += t;
3278 #ifdef DEBUG
3279                 g_message("add on [%d] (%s) => [%d] at (%d,%d) {%d,%d}",
3280                     t, alignment_operation->transition->name,
3281                     score, query_pos, target_pos, i, j);
3282 #endif /* DEBUG */
3283             query_pos
3284                 += alignment_operation->transition->advance_query;
3285             target_pos
3286                 += alignment_operation->transition->advance_target;
3287             }
3288         }
3289     for(i = 0; i < alignment->model->calc_list->len; i++){
3290         calc = alignment->model->calc_list->pdata[i];
3291         if(calc && calc->exit_func)
3292             calc->exit_func(alignment->region, user_data);
3293         }
3294     if(alignment->model->exit_func)
3295         alignment->model->exit_func(alignment->region, user_data);
3296 #ifdef DEBUG
3297     g_message("end with [%d]", score);
3298 #endif /* DEBUG */
3299 #ifdef DEBUG
3300     g_message("CHECKIT: [%d-%d=%d],[%d] [%d-%d=%d],[%d]",
3301              query_pos,
3302              alignment->region->query_start,
3303              (query_pos-alignment->region->query_start),
3304              alignment->region->query_length,
3305               target_pos,
3306               alignment->region->target_start,
3307              (target_pos-alignment->region->target_start),
3308              alignment->region->target_length);
3309 #endif /* DEBUG */
3310     if(score != alignment->score) /* FIXME: temp */
3311         g_warning("Score difference DP[%d] TB[%d]",
3312                 alignment->score, score);
3313     g_assert((query_pos-alignment->region->query_start)
3314              == alignment->region->query_length);
3315     g_assert((target_pos-alignment->region->target_start)
3316              == alignment->region->target_length);
3317     g_assert(score == alignment->score);
3318     g_free(cell);
3319     return TRUE;
3320     }
3321 /* FIXME: Should change equality test in case the score is a float
3322  */
3323 #endif /* G_DISABLE_ASSERT */
3324 
Alignment_has_valid_path(Alignment * alignment)3325 static gboolean Alignment_has_valid_path(Alignment *alignment){
3326     register C4_State *first_state, *last_state;
3327     register AlignmentOperation *alignment_operation, *prev;
3328     register gint i;
3329     g_assert(alignment);
3330     g_assert(alignment->operation_list);
3331     g_assert(alignment->operation_list->len > 0);
3332     alignment_operation = alignment->operation_list->pdata[0];
3333     first_state = alignment_operation->transition->input;
3334     g_assert(first_state == alignment->model->start_state->state);
3335     alignment_operation = alignment->operation_list->pdata
3336                          [alignment->operation_list->len-1];
3337     last_state = alignment_operation->transition->output;
3338     g_assert(last_state == alignment->model->end_state->state);
3339     for(i = 1; i < alignment->operation_list->len; i++){
3340         alignment_operation = alignment->operation_list->pdata[i];
3341         prev = alignment->operation_list->pdata[i-1];
3342         g_assert(alignment_operation->transition->input
3343               == prev->transition->output);
3344         }
3345     return TRUE;
3346     }
3347 
Alignment_is_within_scope(Alignment * alignment,Region * seq_region)3348 static gboolean Alignment_is_within_scope(Alignment *alignment,
3349                                           Region *seq_region){
3350     register gboolean start_at_query_edge,
3351                       start_at_target_edge,
3352                       end_at_query_edge,
3353                       end_at_target_edge;
3354     start_at_query_edge
3355         = (seq_region->query_start == alignment->region->query_start);
3356     start_at_target_edge
3357         = (seq_region->target_start == alignment->region->target_start);
3358     end_at_query_edge = (Region_query_end(alignment->region)
3359                          == Region_query_end(seq_region));
3360     end_at_target_edge = (Region_target_end(alignment->region)
3361                          == Region_target_end(seq_region));
3362     switch(alignment->model->start_state->scope){
3363         case C4_Scope_ANYWHERE:
3364             break;
3365         case C4_Scope_EDGE:
3366             g_assert(start_at_query_edge || start_at_target_edge);
3367             break;
3368         case C4_Scope_QUERY:
3369             g_assert(start_at_query_edge);
3370             break;
3371         case C4_Scope_TARGET:
3372             g_assert(start_at_target_edge);
3373             break;
3374         case C4_Scope_CORNER:
3375             g_assert(start_at_query_edge && start_at_target_edge);
3376             break;
3377         }
3378     switch(alignment->model->end_state->scope){
3379         case C4_Scope_ANYWHERE:
3380             break;
3381         case C4_Scope_EDGE:
3382             g_assert(end_at_query_edge || end_at_target_edge);
3383             break;
3384         case C4_Scope_QUERY:
3385             g_assert(end_at_query_edge);
3386             break;
3387         case C4_Scope_TARGET:
3388             g_assert(end_at_target_edge);
3389             break;
3390         case C4_Scope_CORNER:
3391             g_assert(end_at_query_edge && end_at_target_edge);
3392             break;
3393         }
3394     return TRUE;
3395     }
3396 
Alignment_is_valid(Alignment * alignment,Region * seq_region,gpointer user_data)3397 gboolean Alignment_is_valid(Alignment *alignment, Region *seq_region,
3398                             gpointer user_data){
3399     g_assert(alignment);
3400     g_assert(seq_region);
3401     g_assert(Region_is_within(seq_region, alignment->region));
3402     g_assert(Alignment_is_within_scope(alignment, seq_region));
3403     g_assert(Alignment_has_valid_path(alignment));
3404     g_assert(Alignment_has_valid_alignment(alignment, user_data));
3405     return TRUE;
3406     }
3407 
3408 /**/
3409 
Alignment_import_derived(Alignment * alignment,Alignment * to_add,C4_DerivedModel * derived_model)3410 void Alignment_import_derived(Alignment *alignment,
3411                               Alignment *to_add,
3412                               C4_DerivedModel *derived_model){
3413     register gint i;
3414     register AlignmentOperation *alignment_operation;
3415     register C4_Transition *transition;
3416     g_assert(alignment->model == derived_model->original);
3417     g_assert(to_add->model == derived_model->derived);
3418     for(i = 0; i < to_add->operation_list->len; i++){
3419         alignment_operation = to_add->operation_list->pdata[i];
3420         transition = derived_model->transition_map
3421                                [alignment_operation->transition->id];
3422         Alignment_add(alignment, transition,
3423                       alignment_operation->length);
3424         }
3425     return;
3426     }
3427 
3428