1 /* @source edialign application
2 **
3 ** Local multiple alignment
4 **
5 ** @author Burkhard Morgenstern and Said Abdeddaim
6 ** @modified Alan Bleasby (ajb@ebi.ac.uk) EMBOSS port based on ACD
7 ** from Guy Bottu
8 ** @@
9 **
10 ** This program is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU General Public License
12 ** as published by the Free Software Foundation; either version 2
13 ** of the License, or (at your option) any later version.
14 **
15 ** This program is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ** GNU General Public License for more details.
19 **
20 ** You should have received a copy of the GNU General Public License
21 ** along with this program; if not, write to the Free Software
22 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 ******************************************************************************/
24
25 #include "emboss.h"
26 #include <math.h>
27
28
29 #define edmin(a,b) ((a)<(b)?(a):(b))
30 #define edmax(a,b) ((a)>(b)?(a):(b))
31
32 #define TAILLE_MAX_LIGNE_FICHIER 10000
33
34 #define PAPER_WIDTH 80
35 #define MLINE 1000
36 #define MAX_REGEX 1000
37 #define NAME_LEN 1000
38 #define SEQ_NAME_LEN 12
39 #define MAX_SEQNUM 10000
40 #define MAX_ITNUM 3
41 #define MAX_INPUT_LINE 10000
42 #define MIN_MOT_WGT 0.1
43 #define MAX_CSC 10
44
45 /* Default parameters */
46
47 #define BETA 0
48 #define WEB 0
49 #define OVERLAP_THRESHOLD 35
50 #define MIN_DIA 1
51 #define MAX_DIA 40
52 #define MATNAME "BLOSUM"
53 #define WEAK_WGT_TYPE_THR 0.5
54 #define STRONG_WGT_TYPE_THR 0.75
55
56
57
58
59 #define INT16 short int
60 #ifdef __alpha
61 #define INT32 int
62 #else
63 #define INT32 long int
64 #endif
65 #define REAL32 float
66 #define REAL64 double
67
68 /*
69 #ifndef MAC
70 #define Malloc malloc
71 #define Calloc calloc
72 #define Realloc realloc
73 #define Free free
74 #endif
75 */
76
77 #define STATUS 0x10000002
78
79
80 /* Globals */
81 FILE *fp_dia, *fp_dpa, *fp_frg , *fp_mot ;
82 struct multi_frag *anchor_frg ;
83
84 ajint col_score = 0;
85 ajint char_num[ MAX_REGEX ] ;
86 char *mot_char[ MAX_REGEX ] ;
87 ajint regex_len , mot_len = 0 ;
88
89
90 clock_t beg_pa , end_pa , beg_ali , end_ali , beg_ts , end_ts ;
91 float time_diff_pa , time_diff_ali , perc_pa_time , time_diff_srt ;
92 float total_pa_time = 0 ;
93
94
95 float mot_factor , mot_offset_factor , max_mot_offset ;
96
97 ajint wgt_type_plot = 0 , motifs = 0 ;
98 ajint bubblesort = 0 , cd_gobics = 0 ;
99 ajint nas = 0 , ref_seq = 0;
100 ajuint i_max ;
101 ajint speed_optimized = 0 ;
102 ajint online = 0 ;
103 ajint time_stamps = 0 ;
104 ajint break1 = 0 ;
105 ajint break2 = 0 ;
106 ajint wgt_print = 0 ;
107 ajint wgt_print_x = 0 ;
108 short max_itnum = MAX_ITNUM ;
109 ajint quali_num = 1 ;
110 ajint wgt_plot = 0 ;
111 ajint self_comparison = 0;
112 short exclude_frg = 0;
113 ajint ***gl_exclude_list ;
114 ajint max_sim_score = -2000 ;
115 ajint sf_mat = 0 ;
116 char nuc1, nuc2, nuc3 ;
117 short crick_strand = 0;
118 ajint gl_frg_count = 0;
119 ajint dna_speed = 0;
120 char pst_name[NAME_LEN];
121 ajint cont_it = 1 , wgt_type = 0 ;
122 ajint mask = 0, strict = 0 , textual_alignment = 1;
123 char prn[ NAME_LEN ] ;
124 ajint redundant, print_max_nd = 1;
125 ajint lmax = MAX_DIA;
126 char **arguments;
127 ajint pr_av_nd = 0, pr_av_max_nd ;
128 char input_line[ NAME_LEN ];
129 char input_parameters[ NAME_LEN ];
130 ajint print_status = 0 ;
131 char clust_sim[NAME_LEN] ;
132 float tot_weight = 0, av_len;
133 ajint anchors = 0;
134 ajint pa_only = 0;
135 ajint dia_num = 0;
136 ajint max_dia_num = 0;
137 float av_dia_num = 0;
138 float av_max_dia_num = 0;
139 ajint afc_file = 0;
140 ajint afc_filex = 0;
141 ajint dia_pa_file = 0;
142 ajint frag_file = 0;
143 ajuint argnum;
144 ajint standard_out = 0;
145 ajint plot_num = 4 ;
146 ajint default_name = 1;
147 ajint fasta_file = 0;
148 ajint cw_file = 0;
149 ajint msf_file = 0;
150 char *upg_str = NULL;
151 ajint dcount = 0;
152
153
154 ajint **shift = NULL;
155 ajint thr_sim_score = 4 ;
156
157 char **seq = NULL;
158 /*char *seq[MAX_SEQNUM];*/ /* sequences */
159
160
161 char *newseq[MAX_SEQNUM]; /* sequences */
162 ajint sim_score[21][21]; /* similarity matrix */
163 float av_sim_score_pep ;
164 float av_sim_score_nuc ;
165 float **glob_sim = NULL; /* overall similarity between any two sequences */
166 float **wgt_prot = NULL ; /* `weight' of diagonals */
167 float **wgt_dna = NULL ; /* `weight' of diagonals */
168 float **wgt_trans = NULL; /* `weight' of diagonals */
169 float **min_weight = NULL; /* `weight' of diagonals */
170 ajint min_dia = MIN_DIA ; /* minimum length of diagonals */
171 ajint max_dia = MAX_DIA ; /* maximum length of diagonals */
172 ajint iter_cond_prob = 0;
173 ajint *seqlen; /* lengths of sequences */
174
175 char **full_name = NULL;
176 /*char *full_name[MAX_SEQNUM] ;*/
177
178
179 float **pair_score = NULL;
180 short **cont_it_p = NULL;
181 float score;
182 ajint maxlen; /* maximum length of sequences */
183 ajuint seqnum; /* number of sequences */
184 ajint *num_dia_bf = NULL; /* num_dia_bf[ istep ] = number of diagonals from
185 all pairwise alignments BEFORE FILTER
186 PROCEDURE in iteration step `istep' */
187 ajint *num_dia_af = NULL; /* num_dia_af[istep] = number of diagonals from
188 all pairwise alignments AFTER FILTER
189 PROCEDURE in iteration step `it' */
190 ajint num_dia_anc; /* number of diagonals definde by anchored
191 regions */
192 ajint num_all_it_dia = 0; /* total number of diagonals in multiple alignment
193 in all iteration steps */
194 float weight_sum_bf; /* sum of weights of diagonals in multiple
195 alignment before filter procedure */
196 float weight_sum_af; /* sum of weights of diagonals in multiple
197 alignment after fliter procedure*/
198 float threshold = 0.0 ; /* threshold T */
199 ajuint num_dia_p; /* number of diagonals in pairwise alignment */
200 ajint long_output = 0; /* if long_output = 1, a log-file is produced. */
201 ajint frg_mult_file = 0 ;
202 ajint frg_mult_file_v = 0 ;
203 ajint overlap_weights = 1 ;
204 ajint ow_force = 0 ;
205 ajint anc_num = 0; /* number of anchored regions
206 (specified in file *.anc) */
207 ajint par_count; /* number of parameters */
208 float pairalignsum; /* sum of weights in pairwise alignment */
209 ajint pairalignlen; /* sum of aligned residues in pairwise alignment */
210 char amino_acid[22];
211 ajint istep;
212 struct multi_frag /* pointer to first diagonal in multiple alignment */
213 *this_it_dia; /* in current iteration step */
214 struct multi_frag /* pointer to first diagonal in multiple alignment */
215 *all_it_dia = NULL; /* in all iteration step */
216 struct multi_frag *end_dia;
217 /* pointer to last diagonal in multiple alignment */
218
219 char par_dir[NAME_LEN];
220
221 char **seq_name = NULL;
222
223 /*char *seq_name[MAX_SEQNUM];*/
224
225 char mat_name[NAME_LEN]; /* name of file containing similarity matrix */
226 char mat_name_p[NAME_LEN];
227 char anc_name[NAME_LEN]; /* anchored regions */
228 char seq_file[NAME_LEN];
229 char input_name[NAME_LEN];
230 char tmp_str[NAME_LEN];
231 char output_name[NAME_LEN];
232 char printname[NAME_LEN];
233 char gl_mot_regex[MAX_REGEX] ;
234
235 char *par_file;
236
237 short **mot_pos ; /* positions of pre-defined motifs */
238
239 ajint **amino = NULL; /* amino acid residues in protein sequences or
240 translated DNA sequences, respective */
241
242 ajint **amino_c = NULL; /* amino acid residues on crick strand */
243
244
245
246 ajint ***open_pos; /* open_pos[i][j][p] = 1, if the p-th residue of
247 sequence i is not yet directly (by one diagonal)
248 aligned with any residue of sequence j and
249 open_pos[i][j][r] = 0 otherwise. So, at the
250 beginning of the first iteration step, all values
251 are 1. In the subsequent iteration steps,
252 only those parts of the sequence are considered,
253 that are not yet aligned. */
254
255
256 struct multi_frag *pair_dia; /* diagonals in pairwise alignemnt */
257
258
259 double **tp400_prot =NULL; /* propability distribution for sums of similarity
260 socores in diagonals occurring in comparison matrix
261 (by random experiments and approximation */
262
263 double **tp400_dna =NULL; /* propability distribution for sums of similarity
264 socores in diagonals occurring in comparison matrix
265 (by random experiments and approximation */
266
267 double **tp400_trans =NULL; /* propability distribution for sums of similarity
268 socores in diagonals occurring in comparison matrix
269 (by random experiments and approximation */
270
271
272 char dia_pa_name[NAME_LEN];
273 char frag_file_name[NAME_LEN];
274 char mot_file_name[NAME_LEN];
275
276 ajint lgs_option = 0;
277 float sf_mat_thr = 0;
278
279
280
281
282 /* @datastatic edialignPositionSet ********************************************
283 **
284 ** Dialign positionset structure
285 **
286 ** @attr pos [ajint*] Positions array
287 ** @attr nbr [ajint] Size of position array
288 ** @attr Padding [char[4]] Padding to alignment boundary
289 ******************************************************************************/
290
291 typedef struct
292 {
293 ajint *pos;
294 ajint nbr;
295 char Padding[4];
296 } edialignPositionSet;
297
298
299
300
301 /* @datastatic edialignSequence ***********************************************
302 **
303 ** Dialign sequence structure
304 **
305 ** @attr longueur [ajint] Length
306 ** @attr Padding [char[4]] Padding to alignment boundary
307 ** @attr aligSetNbr [ajint*] Numbers of sets
308 ** @attr predAligSetPos [ajint*] Predicted alignment set positions
309 ** @attr succAligSetPos [ajint*] Successful alignment set positions
310 ******************************************************************************/
311
312
313 typedef struct {
314 ajint longueur;
315 char Padding[4];
316 ajint *aligSetNbr;
317 ajint *predAligSetPos;
318 ajint *succAligSetPos;
319 } edialignSequence;
320
321
322
323
324 /* @datastatic edialignCLOSURE ************************************************
325 **
326 ** Dialign closure structure
327 **
328 ** @attr seq [edialignSequence*] Sequences
329 ** @attr seqNbr [ajint] Numbers of sequences
330 ** @attr maxLong [ajint] Maximum long
331 ** @attr aligSet [edialignPositionSet*] Alignment sets
332 ** @attr nbrAligSets [ajint] Number of alignment sets
333 ** @attr oldNbrAligSets [ajint] Old number of alignment sets
334 ** @attr predFrontier [ajint**] Predicted boundaries
335 ** @attr succFrontier [ajint**] Successful boundaries
336 ** @attr topolog [ajint*] Topologies
337 ** @attr gauche1 [ajint*] Left end in 1
338 ** @attr gauche2 [ajint*] Left end in 2
339 ** @attr droite1 [ajint*] Right end in 1
340 ** @attr droite2 [ajint*] Right end in 2
341 ** @attr pos_ [ajint**] Positions
342 ******************************************************************************/
343
344
345 typedef struct {
346
347 edialignSequence *seq;
348 ajint seqNbr;
349 ajint maxLong;
350
351 edialignPositionSet *aligSet;
352 ajint nbrAligSets;
353 ajint oldNbrAligSets;
354
355 ajint **predFrontier;
356 ajint **succFrontier;
357
358 ajint *topolog;
359 ajint *gauche1;
360 ajint *gauche2;
361 ajint *droite1;
362 ajint *droite2;
363 ajint **pos_;
364
365 } edialignCLOSURE;
366
367
368
369
370 /*
371 fragments in function `pairalign'
372
373 b1, b2: begin of the diagonal
374 ext: length of the diagonal
375 weight: weight of the diagonal
376 prec: preceding diagonal in dot matrix
377 last: last diagonal ending in the same column
378 sum: sum of weights accumulated
379 cs: crick strand
380 trans: translation
381 */
382
383 struct pair_frag
384 {
385 ajint b1;
386 ajint b2;
387 ajint ext;
388 float weight;
389 struct pair_frag *prec;
390 struct pair_frag *last;
391 float sum;
392 short trans;
393 short cs;
394 };
395
396
397
398
399 /*
400 fragments outside function `pairalign'
401
402 b[0], b[1]: begin of the diagonal
403 s[0], s[1]: sequences, to which diagonal belongs
404 ext: length of the diagonal
405 weight: individual weight of the diagonal
406 ow: overlap weight of the diagonal
407 sel: 1, if accepted in filter proces, 0 else
408 trans: translation
409 cs: crick strand
410 Padding: padding to alignment boundary
411 it: iteration step
412 *next: next diagonal
413 */
414
415 struct multi_frag
416 {
417 ajint b[2];
418 ajint s[2];
419 ajint ext;
420 ajint it;
421 float weight;
422 float ow;
423 short sel;
424 short trans;
425 short cs;
426 short Padding;
427 struct multi_frag *next;
428 struct multi_frag *pred;
429 };
430
431
432
433
434 struct leaf
435 {
436 ajint s1;
437 ajint s2;
438 ajint clade;
439 };
440
441
442
443
444 struct seq_pair
445 {
446 ajint s1;
447 ajint s2;
448 float weight;
449 };
450
451
452
453
454 struct subtree
455 {
456 ajint member_num;
457 ajint valid;
458 ajint *member;
459 char *name;
460 float depth;
461 char Padding[4];
462 };
463
464
465
466
467 char DEBUG=0;
468 edialignCLOSURE *gabiosclos; /* closure data structure for GABIOS-LIB */
469
470
471
472 static void **edialign_callouer_mat(size_t t_elt, size_t nb_lig,
473 size_t nb_col);
474 static void *edialign_allouer(size_t taille);
475 static void *edialign_reallouer(void *pointeur, size_t taille);
476 static void edialign_liberer(void *pointeur);
477 static void edialign_liberer_mat(void **pointeur, size_t nb_lig);
478 static void edialign_realloc_closure(edialignCLOSURE *clos);
479 static void edialign_free_closure(edialignCLOSURE *clos);
480 static void edialign_freeAligGraphClosure(edialignCLOSURE *clos);
481 static void **edialign_recallouer_mat(void **pointeur, size_t t_elt,
482 size_t anc_nb_lig,
483 size_t nb_lig, size_t nb_col);
484 static void edialign_desinit_seq(edialignCLOSURE *clos);
485 static void edialign_erreur(const char *message);
486 static ajint edialign_word_count(char *str);
487 static void edialign_rel_wgt_calc(ajint l1, ajint l2, float **rel_wgt);
488 static void edialign_wgt_prnt_prot(void);
489 static ajint edialign_mini2(ajint a, ajint b);
490 static ajuint edialign_minu2(ajint a, ajint b);
491 static ajint edialign_mini3(ajint a, ajint b, ajint c);
492 static float edialign_mot_dist_factor(ajint offset , float parameter);
493 static float edialign_maxf2(float a, float b);
494 static void edialign_wgt_prnt(void);
495 static void edialign_regex_parse(char *mot_regex);
496 static void edialign_seq_parse(char *mot_regex_unused);
497 static void edialign_seq_shift(void);
498 static void edialign_matrix_read(FILE *fp_mat);
499 static void edialign_mem_alloc(void);
500 static ajint edialign_multi_anc_read(char *file_name);
501 static void edialign_exclude_frg_read( char *file_name , int ***exclude_list);
502 static void edialign_tp400_read(ajint w_type, double **pr_ptr);
503 static edialignCLOSURE *edialign_newAligGraphClosure(ajint nbreseq,
504 ajint *longseq,
505 ajint nbreancr,
506 ajint **ancrages);
507 static ajint edialign_translate(char c1, char c2 ,char c3, ajint seqno,
508 ajint pos);
509
510 static char edialign_invert(char c1);
511 static void edialign_ow_bubble_sort( int number , struct multi_frag *dp );
512 static void edialign_frag_sort(ajint number , struct multi_frag *dp ,
513 ajint olw );
514 static void edialign_filter(ajint *number, struct multi_frag *diagonal);
515 static void edialign_para_print( char *s_f, FILE *fpi );
516
517 static float edialign_frag_chain(ajint n1, ajint n2, FILE *fp1, FILE *fp_m,
518 ajuint *number);
519 static void edialign_ow_add( struct multi_frag *sm1 , struct multi_frag *sm2 );
520 static void edialign_print_log(struct multi_frag *d,FILE *fp_l,FILE *fp_fs);
521 static void edialign_print_fragments(struct multi_frag *d , FILE *fp_ff2 );
522 static void edialign_throw_out( float *weight_sum );
523 static void edialign_sel_test(void);
524 static void edialign_av_tree_print(void);
525 static void edialign_subst_mat( char *file_name, int fragno ,
526 struct multi_frag *frg );
527 static void edialign_ali_arrange(ajint ifragno , struct multi_frag *d,
528 FILE *fp, AjPSeqout seqout, FILE *fp3 ,
529 FILE *fp4 ,
530 FILE *fp_col_score,AjBool isprot);
531 static void edialign_bubble_sort(ajint number, struct multi_frag *dp);
532
533
534
535
536 #if 0
537 static void regex_format_complain(void);
538 #endif
539
540
541
542
543 /* @prog edialign *************************************************************
544 **
545 ** Local multiple alignment
546 **
547 ******************************************************************************/
548
main(int argc,char ** argv)549 int main(int argc, char **argv)
550 {
551 ajuint k;
552 ajint dia_counter;
553 ajint tmpi1;
554 ajint tmpi2 ;
555
556 struct multi_frag *current_dia;
557 struct multi_frag *diagonal1;
558 struct multi_frag *diagonal2;
559
560 /* pointers to diagonals in multiple alignment */
561
562 char str[NAME_LEN], dist_name[NAME_LEN];
563 char par_str[NAME_LEN];
564 char *char_ptr;
565 char prn2[NAME_LEN];
566 char logname[NAME_LEN];
567 char fsm_name[NAME_LEN];
568 char dia_name[NAME_LEN];
569 char csc_name[NAME_LEN];
570 char itname[NAME_LEN], itname2[NAME_LEN], itname3[NAME_LEN];
571 char itname4[NAME_LEN];
572 char dialign_dir[NAME_LEN];
573
574 ajuint i;
575 ajuint j;
576 ajuint n;
577 ajint len;
578
579 ajuint hv;
580
581 ajint ii;
582
583 FILE *fp_ali = NULL;
584 /* FILE *fp2 = NULL;*/
585 FILE *fp3 = NULL;
586 FILE *fp4 = NULL;
587 FILE *fp_log = NULL;
588 FILE *fp_fsm = NULL;
589 FILE *fp_st;
590 FILE *fp_csc = NULL;
591 FILE *fp_matrix = NULL; /* file containing similarity matrix */
592
593 AjPFile matfp = NULL;
594 AjPSeqset seqset = NULL;
595 const AjPSeq pseq;
596 const AjPStr sname = NULL;
597 AjPStr nucmode;
598 AjBool revcomp;
599 AjPStr overlapw;
600 AjPStr linkage;
601 AjPFile outfile = NULL;
602 AjBool isprot;
603 ajuint s_len;
604 char s_str[SEQ_NAME_LEN+3];
605 AjPSeqout seqout = NULL;
606 AjPStr tnstr = NULL;
607
608 struct multi_frag *next_dia;
609
610 embInit("edialign", argc, argv);
611
612 tnstr = ajStrNew();
613
614 seqset = ajAcdGetSeqset("sequences");
615 nucmode = ajAcdGetListSingle("nucmode");
616 revcomp = ajAcdGetBoolean("revcomp");
617 overlapw = ajAcdGetSelectSingle("overlapw");
618 linkage = ajAcdGetListSingle("linkage");
619 lmax = ajAcdGetInt("maxfragl");
620 dna_speed = !!ajAcdGetBoolean("fragmat");
621
622 thr_sim_score = ajAcdGetInt("fragsim");
623 iter_cond_prob = !!ajAcdGetBoolean("itscore");
624
625 threshold = ajAcdGetFloat("threshold");
626 mask = !!ajAcdGetBoolean("mask");
627 plot_num = ajAcdGetInt("starnum");
628
629 quali_num = !ajAcdGetBoolean("dostars");
630
631 outfile = ajAcdGetOutfile("outfile");
632 seqout = ajAcdGetSeqoutall("outseq");
633
634 ajSeqsetFmtUpper(seqset);
635 n = ajSeqsetGetSize(seqset);
636
637 max_dia = lmax;
638
639 AJCNEW0(seq,n);
640 AJCNEW0(seq_name,n);
641 AJCNEW0(full_name,n);
642 AJCNEW0(seqlen,n+1);
643
644 isprot = ajFalse;
645
646 for(i=0;i<n;++i)
647 {
648 pseq = ajSeqsetGetseqSeq(seqset,i);
649 len = ajSeqGetLen(pseq);
650 seqlen[i] = len;
651 if(ajSeqIsProt(pseq))
652 isprot = ajTrue;
653
654 AJCNEW(seq[i],len+2); /* room to shift */
655 strcpy(seq[i],ajSeqGetSeqC(pseq));
656
657 sname = ajSeqsetGetseqNameS(seqset,i);
658 len = ajStrGetLen(sname);
659 AJCNEW(seq_name[i],SEQ_NAME_LEN+3);
660 AJCNEW(full_name[i],len+1);
661
662 s_len = ajStrGetLen(sname);
663 for(j=0; j < SEQ_NAME_LEN; ++j)
664 if(j < s_len)
665 s_str[j] = *(ajStrGetPtr(sname) + j);
666 else
667 s_str[j] = ' ';
668
669 s_str[ SEQ_NAME_LEN ] = '\0';
670
671
672
673
674 strcpy(seq_name[i],s_str);
675 strcpy(full_name[i],ajStrGetPtr(sname));
676 }
677
678 seqnum = n;
679
680 if(!isprot)
681 {
682 if(ajStrMatchC(nucmode,"n"))
683 wgt_type = 1;
684 else if(ajStrMatchC(nucmode,"nt"))
685 wgt_type = 2;
686 else if(ajStrMatchC(nucmode,"ma"))
687 wgt_type = 3;
688 }
689
690
691 crick_strand = !!revcomp;
692
693 if(ajStrMatchC(overlapw,"yes"))
694 overlap_weights = 0;
695 else if(ajStrMatchC(overlapw,"no"))
696 ow_force = 1;
697
698 if(ajStrMatchC(linkage,"max"))
699 strcpy(clust_sim,"max");
700 else if(ajStrMatchC(linkage,"min"))
701 strcpy(clust_sim,"min");
702 else if(ajStrMatchC(linkage,"UPGMA"))
703 strcpy(clust_sim, "av");
704
705 matfp = ajDatafileNewInNameC("edialignmat");
706
707 if(!matfp)
708 ajFatal("edialignmat matrix file not found\n");
709
710
711 strcpy(mat_name,MATNAME);
712
713
714 par_file = (char *) calloc((size_t) NAME_LEN , sizeof(char) );
715
716
717 if( time_stamps )
718 beg_ali = clock() ;
719
720 strcpy ( dialign_dir , "DIALIGN2_DIR" );
721
722 /*
723 if ((par_file = getenv(dialign_dir)) == NULL)
724 {
725 printf("\n \n \n Please set the environmentvariable "
726 "DIALIGN2_DIR \n");
727 printf(" as described in the README file \n");
728 embExitBad();
729 }
730 */
731 argnum = argc;
732 /*
733 strcpy( par_dir , par_file );
734 */
735
736
737 /*
738 if(argc == 1)
739 {
740 printf("\n usage: %s [ options ] <seq_file> \n\n", argv[0] );
741 printf(" <seq_file> contains input sequences in FASTA format.\n");
742 printf(" Per default, sequences are assumed to be protein "
743 "sequences.\n" ) ;
744 printf(" For DNA alignment, please use one of these options: \n\n");
745 printf(" -n DNA sequences; similarity calculated at the "
746 "nucleotide level \n\n");
747 printf(" -nt DNA sequences; similarity calculated at the "
748 "peptide level\n");
749 printf(" (by translation using the genetic code) \n\n");
750 printf(" -lgs long genomic sequences: Both nucleotide and "
751 "peptide\n");
752 printf(" similarities calculated \n\n");
753 printf(" Many more options are available, please consult the \n");
754 printf(" DIALIGN USER_GUIDE that should come with the DIALIGN "
755 "package.\n");
756 printf(" For more information on DIALIGN, please visit the "
757 "DIALIGN\n");
758 printf(" home page at BiBiServ (Bielefeld Bioinformatic "
759 "Server): \n\n") ;
760 printf(" http://bibiserv.techfak.uni-bielefeld.de/"
761 "dialign/ \n\n");
762 embExitBad() ;
763 }
764 */
765
766 arguments = ( char ** ) calloc( argnum , sizeof ( char * ) );
767
768 for( i = 0 ; i < argnum ; i++ )
769 {
770 arguments[i] = ( char *) calloc( NAME_LEN , sizeof (char) );
771 strcpy( arguments[i] , argv[i] );
772 }
773
774
775
776 sname = ajSeqsetGetFilename(seqset);
777
778 strcpy( input_name , ajStrGetPtr(sname));
779
780 threshold = 0.0 ;
781
782
783 /*
784 edialign_para_read( argnum , arguments );
785 */
786
787 if( ( textual_alignment == 0 ) && ( col_score == 1 ) )
788 {
789 printf("\n\n Option -csc makes sense only if \"textual alignment\"");
790 printf(" is produced. \n");
791 printf(" This can be enforced with option -ta \n\n");
792 printf(" program terminated \n\n\n");
793 embExitBad() ;
794 }
795
796
797 if( cd_gobics )
798 {
799 strcpy( input_line , "program parameters: " ) ;
800 for( i = 1 ; i < ( argnum -1 ) ; i++ ) {
801 strcat( input_line , argv[i] );
802 strcat( input_line , " " );
803 }
804 }
805 else
806 {
807 strcpy( input_line , "program call: " ) ;
808 for( i = 0 ; i < argnum ; i++ ) {
809 strcat( input_line , argv[i] );
810 strcat( input_line , " " );
811 }
812 }
813
814
815 if ( wgt_type > 0 )
816 strict = 1 ;
817
818 strcpy( seq_file , input_name );
819
820 if(
821 ( ! strcmp( input_name + strlen( input_name ) - 4 , ".seq" ) )
822 || ( ! strcmp( input_name + strlen( input_name ) - 3 , ".fa" ) )
823 || ( ! strcmp( input_name + strlen( input_name ) - 6 , ".fasta" ) )
824 )
825 if( ( char_ptr = strrchr(input_name,'.') ) != NULL)
826 *char_ptr = '\0';
827
828
829 strcpy( anc_name , input_name );
830 strcat( anc_name , ".anc" );
831 /*
832 seqnum = edialign_seq_read( seq_file , seq , seq_name , full_name ) ;
833 */
834 if ( motifs )
835 edialign_regex_parse( gl_mot_regex ) ;
836
837
838 if( ( seqnum == 2 ) && ( iter_cond_prob == 0 ) )
839 max_itnum = 1 ;
840
841
842 if( ( ow_force == 0 ) && ( seqnum > OVERLAP_THRESHOLD ) )
843 overlap_weights = 0;
844 if( seqnum == 2 )
845 overlap_weights = 0;
846
847 if( seqnum < 2 )
848 {
849 if( cd_gobics )
850 {
851 printf("\n\n Something is wrong with your sequence file. "
852 "Maybe you entered a\n");
853 printf(" MS WORD or RFT file or your file contains only "
854 "one single sequence.\n");
855 printf(" Please note that our server only accepts plain "
856 "text files. \n\n");
857 printf(" For more information, please consult our online "
858 "manual \n");
859 printf(" at the CHAOS/DIALIGN home page:\n\n");
860 printf(" http://dialign.gobics.de/"
861 "chaos-dialign-manual");
862 }
863
864 else
865 {
866 ajFatal("This application requires more than one sequence\n");
867 /*
868 printf("\n\n Your sequence file containes only a single "
869 "sequence.\n");
870 printf(" Please make sure your input file contains at "
871 "least two sequences.\n\n");
872 printf(" For more information, please consult the online "
873 "manual \n");
874 printf(" at the DIALIGN home page: \n\n");
875 printf(" http://bibiserv.techfak.uni-bielefeld.de/"
876 "dialign/manual.html ");
877 */
878 }
879 /*
880
881
882 printf("\n \n \n \n \n");
883 embExitBad();
884 */
885 }
886
887 maxlen = 0;
888
889
890
891 if( (pair_score = (float **) calloc( seqnum , sizeof(float *) )) == NULL)
892 {
893 printf(" problems with memory allocation for `pair_score' ! \n \n");
894 embExitBad();
895 }
896
897 for(i=0;i<seqnum;i++)
898 if( (pair_score[i] = (float *) calloc( seqnum ,
899 sizeof(float) )) == NULL)
900 {
901 printf(" problems with memory allocation for "
902 "`pair_score' ! \n \n");
903 embExitBad();
904 }
905
906 if(( cont_it_p = (short **) calloc( seqnum , sizeof( short *))) == NULL )
907 {
908 printf(" problems with memory allocation for `cont_it_p ' ! \n \n");
909 embExitBad();
910 }
911
912 for( i = 0 ; i < seqnum ; i++ )
913 if( (cont_it_p[i] = (short *) calloc( seqnum ,
914 sizeof(short) )) == NULL)
915 {
916 printf(" problems with memory allocation for "
917 "`cont_it_p' ! \n \n");
918 embExitBad();
919 }
920
921 for( i = 0 ; i < seqnum ; i++ )
922 for( j = 0 ; j < seqnum ; j++ )
923 cont_it_p[i][j] = 1 ;
924
925
926
927 for( i = 0 ; i < seqnum ; i++ )
928 {
929 av_len = av_len + seqlen[i];
930
931 if( seqlen[i] == 0 )
932 {
933 printf("\n \n \n WARNING: \n \n");
934 printf(" Sequence %u contains no residues.\n",i+1);
935 printf(" Please inspect the sequence file.\n \n ");
936 printf("\n \n Program terminated \n \n \n " );
937
938 embExitBad();
939 }
940
941 if(maxlen < seqlen[i])
942 maxlen = seqlen[i];
943 }
944
945
946 av_len = av_len / seqnum;
947
948 if ( motifs )
949 edialign_seq_parse( gl_mot_regex ) ;
950
951 edialign_seq_shift();
952
953
954 if( (glob_sim =
955 (float **) calloc( seqnum , sizeof(float*))) == NULL)
956 {
957 printf("Problems with memory allocation for glob_sim\n");
958 embExitBad();
959 }
960
961 for(i=0;i<seqnum;i++)
962 {
963
964 if( (glob_sim[i] =
965 (float *) calloc( seqnum , sizeof(float))) == NULL)
966 {
967 printf("Problems with memory allocation for glob_sim \n");
968 embExitBad();
969 }
970
971 }
972
973 strcpy(par_str,"sdfsdf");
974
975 /*
976 if( argc > 1 )
977 {
978 strcpy(str,par_dir);
979 strcat(str,"/");
980 strcat(str,mat_name);
981 strcpy(mat_name_p,str);
982
983 if( (fp_matrix = fopen(mat_name_p, "r")) == NULL)
984 {
985
986
987 printf("\n\n Cannot find the file %s \n\n", mat_name );
988 printf(" Make sure the environment variable DIALIGN2_DIR "
989 "points\n");
990 printf(" to a directory containing the files \n\n");
991 printf(" BLOSUM \n tp400_dna\n tp400_prot \n "
992 "tp400_trans \n\n" );
993 printf(" These files should be contained in the DIALIGN "
994 "package \n\n\n" ) ;
995 embExitBad() ;
996
997
998
999
1000 printf("\n \n \n \n ATTENTION ! \n \n");
1001 printf("\n There is no similarity matrix `%s'. \n", mat_name);
1002 printf(" in the directory \n \n");
1003 printf(" %s\n \n", par_dir);
1004 embExitBad();
1005 }
1006 }
1007 */
1008
1009 /*
1010 ** This section had to be used if the VC++ /MT libs were used instead of
1011 ** the /MD ones
1012 ajStrAssignS(&tnstr,ajFileGetPrintnameS(matfp));
1013 ajFileClose(&matfp);
1014
1015 fp_matrix = fopen(ajStrGetPtr(tnstr),"rb");
1016 */
1017
1018 fp_matrix = ajFileGetFileptr(matfp);
1019
1020 if( wgt_type != 1 )
1021 edialign_matrix_read( fp_matrix );
1022
1023 ajFileClose(&matfp);
1024
1025 edialign_mem_alloc( );
1026
1027
1028 if( wgt_type != 1 )
1029 if( (amino = (int **) calloc( seqnum , sizeof(int *) ) ) == NULL)
1030 {
1031 printf(" problems with memory allocation");
1032 printf(" for `amino' ! \n \n");
1033 embExitBad();
1034 }
1035
1036 if( wgt_type != 1 )
1037 for( i = 0 ; i < seqnum ; i++ )
1038 if((amino[i] = (int *) calloc((seqlen[i]+5), sizeof(int))) == NULL)
1039 {
1040 printf(" problems with memory allocation");
1041 printf(" for `amino[%u]' ! \n \n", i);
1042 embExitBad();
1043 }
1044
1045
1046 if( crick_strand )
1047 {
1048 if( (amino_c = (int **) calloc( seqnum , sizeof(int *) ) ) == NULL)
1049 {
1050 printf(" problems with memory allocation");
1051 printf(" for `amino_c' ! \n \n");
1052 embExitBad();
1053 }
1054
1055 for( i = 0 ; i < seqnum ; i++ )
1056 if( (amino_c[i] = (int *) calloc((seqlen[i]+5 ),
1057 sizeof(int) ) ) == NULL)
1058 {
1059 printf(" problems with memory allocation");
1060 printf(" for `amino_c[%u]' ! \n \n", i);
1061 embExitBad();
1062 }
1063 }
1064
1065
1066 /******************************************************
1067 * *
1068 * read file, that contains data of anchored regions *
1069 * *
1070 ******************************************************/
1071
1072
1073
1074 if( anchors )
1075 {
1076 edialign_multi_anc_read( input_name );
1077 }
1078
1079 if( exclude_frg )
1080 {
1081 if((gl_exclude_list = (int ***) calloc(seqnum,
1082 sizeof(int **) )) == NULL)
1083 {
1084 printf(" problems with memory allocation for "
1085 "'exclude_list' \n \n");
1086 embExitBad();
1087 }
1088
1089 for(i = 0 ; i < seqnum ; i++ )
1090 if( ( gl_exclude_list[ i ] =
1091 (int **) calloc( seqnum , sizeof(int *) )) == NULL)
1092 {
1093 printf(" problems with memory allocation for "
1094 "'gl_exclude_list' \n \n");
1095 embExitBad();
1096 }
1097
1098 for(i = 0 ; i < seqnum ; i++ )
1099 for(j = 0 ; j < seqnum ; j++ )
1100 if( ( gl_exclude_list[ i ][ j ] =
1101 (int *) calloc( seqlen[ i ] + 1 , sizeof(int) )) == NULL)
1102 {
1103 printf(" problems with memory allocation for "
1104 "'gl_exclude_list' \n \n");
1105 embExitBad();
1106 }
1107
1108 edialign_exclude_frg_read ( input_name , gl_exclude_list ) ;
1109 }
1110
1111
1112 if( wgt_type == 0 )
1113 edialign_tp400_read( 0 , tp400_prot);
1114 if( wgt_type % 2 )
1115 edialign_tp400_read( 1 , tp400_dna );
1116 if( wgt_type > 1 )
1117 edialign_tp400_read( 2 , tp400_trans );
1118
1119 /****************************\
1120 * *
1121 * Name of output files *
1122 * *
1123 \****************************/
1124
1125 if( default_name )
1126 {
1127 strcpy( printname , input_name);
1128 strcpy( prn , printname);
1129 }
1130 else
1131 {
1132 strcpy( printname , output_name );
1133 strcpy( prn , printname);
1134 }
1135
1136
1137 strcpy(prn2 , prn);
1138
1139 if( default_name )
1140 strcat(prn,".ali");
1141
1142 strcat(prn2,".fa");
1143
1144
1145
1146 strcpy(logname,printname);
1147 strcat(logname,".log");
1148
1149 strcpy(fsm_name , printname);
1150 strcat(fsm_name,".fsm");
1151
1152 if( print_status )
1153 {
1154 strcpy( pst_name , printname );
1155 strcat( pst_name,".sta");
1156 }
1157
1158 if( afc_file )
1159 {
1160 strcpy( dia_name , printname );
1161 strcat( dia_name , ".afc" );
1162 fp_dia = fopen( dia_name , "w" );
1163 fprintf(fp_dia,"\n # %s \n\n seq_len: " , input_line );
1164 for( i = 0 ; i < seqnum ; i++ )
1165 fprintf(fp_dia," %d ", seqlen[i] );
1166 fprintf(fp_dia,"\n\n");
1167
1168 }
1169
1170 if( col_score )
1171 {
1172 strcpy( csc_name , printname );
1173 strcat( csc_name , ".csc" );
1174 fp_csc = fopen( csc_name , "w" );
1175 }
1176
1177 if( dia_pa_file )
1178 {
1179 strcpy( dia_pa_name , printname );
1180 strcat( dia_pa_name , ".fop" );
1181
1182 fp_dpa = fopen( dia_pa_name , "w" );
1183
1184
1185 fprintf(fp_dpa,"\n # %s \n\n seq_len: " , input_line );
1186 for( i = 0 ; i < seqnum ; i++ )
1187 fprintf(fp_dpa," %d ", seqlen[i] );
1188 fprintf(fp_dpa,"\n\n");
1189 fclose( fp_dpa ) ;
1190 }
1191
1192
1193 if( motifs )
1194 {
1195 strcpy( mot_file_name , printname );
1196 strcat( mot_file_name , ".mot" );
1197 fp_mot = fopen( mot_file_name , "w" );
1198
1199 fprintf(fp_mot,"\n # %s \n\n " , input_line );
1200 fprintf(fp_mot," motif: %s \n\n", gl_mot_regex );
1201 fprintf(fp_mot," max offset for motifs = %d \n\n",
1202 (int) max_mot_offset );
1203 fprintf(fp_mot," the following fragments contain the motif: \n\n" );
1204 fprintf(fp_mot," seq1 seq2 beg1 beg1 len wgt" );
1205 fprintf(fp_mot," # mot mot_wgt \n\n" );
1206 }
1207
1208
1209 if( frag_file )
1210 {
1211 strcpy( frag_file_name , printname );
1212 strcat( frag_file_name , ".frg" );
1213 fp_frg = fopen( frag_file_name , "w" );
1214
1215 fprintf(fp_frg,"\n # %s \n\n seq_len: " , input_line );
1216 for( i = 0 ; i < seqnum ; i++ )
1217 fprintf(fp_frg," %d ", seqlen[i] );
1218 fprintf(fp_frg,"\n sequences: " );
1219 for( i = 0 ; i < seqnum ; i++ )
1220 fprintf(fp_frg," %s ", seq_name[i] );
1221
1222 fprintf(fp_frg ,"\n\n");
1223 }
1224
1225
1226
1227 gabiosclos = edialign_newAligGraphClosure(seqnum, seqlen, 0, NULL);
1228
1229 if( (open_pos = (int *** ) calloc( seqnum , sizeof(int **))) == NULL)
1230 {
1231 printf("Problems with memory allocation for open_pos\n");
1232 embExitBad();
1233 }
1234
1235 for(i=0;i<seqnum;i++)
1236 {
1237 if( (open_pos[i] =
1238 (int ** ) calloc( seqnum , sizeof(int *))) == NULL)
1239 {
1240 printf("Problems with memory allocation for open_pos\n");
1241 embExitBad();
1242 }
1243 }
1244
1245
1246 for(i=0;i<seqnum;i++)
1247 for(j=0;j<seqnum;j++)
1248 {
1249 if( (open_pos[i][j] =
1250 (int * ) calloc( ( seqlen[i]+2) , sizeof(int) ) ) == NULL)
1251 {
1252 printf("Problems with memory allocation for open_pos\n");
1253 embExitBad();
1254 }
1255 }
1256
1257 for( i = 0 ; i <seqnum ; i++)
1258 for( j = 0 ; j <seqnum ; j++)
1259 for( ii = 1 ; ii <= seqlen[i] ; ii++)
1260 open_pos[i][j][ii] = 1;
1261
1262
1263 /**************************************
1264 * *
1265 * definition of `amino' *
1266 * *
1267 **************************************/
1268
1269
1270
1271
1272 if( wgt_type > 1 )
1273 for(hv=0;hv<seqnum;hv++)
1274 for(ii=1;ii<=seqlen[hv]-2;ii++)
1275 {
1276 if(edialign_translate(seq[hv][ii],seq[hv][ii+1],
1277 seq[hv][ii+2],hv,
1278 ii ) == -1)
1279 embExitBad();
1280
1281
1282 amino[hv][ii] = edialign_translate(seq[hv][ii],
1283 seq[hv][ii+1],
1284 seq[hv][ii+2],hv,ii);
1285
1286 if( crick_strand )
1287 {
1288 nuc1 = edialign_invert( seq[hv][ii+2] );
1289 nuc2 = edialign_invert( seq[hv][ii+1] );
1290 nuc3 = edialign_invert( seq[hv][ii] );
1291
1292 amino_c[hv][ii] = edialign_translate( nuc1 , nuc2 , nuc3 ,
1293 hv , ii);
1294 }
1295 }
1296
1297
1298 if( wgt_type == 0 )
1299 for(hv=0;hv<seqnum;hv++)
1300 for(ii=1;ii<=seqlen[hv];ii++)
1301 {
1302 if( seq[hv][ii] == 'C' ) amino[hv][ii] = 1;
1303 if( seq[hv][ii] == 'S' ) amino[hv][ii] = 2;
1304 if( seq[hv][ii] == 'T' ) amino[hv][ii] = 3;
1305 if( seq[hv][ii] == 'P' ) amino[hv][ii] = 4;
1306 if( seq[hv][ii] == 'A' ) amino[hv][ii] = 5;
1307 if( seq[hv][ii] == 'G' ) amino[hv][ii] = 6;
1308 if( seq[hv][ii] == 'N' ) amino[hv][ii] = 7;
1309 if( seq[hv][ii] == 'D' ) amino[hv][ii] = 8;
1310 if( seq[hv][ii] == 'E' ) amino[hv][ii] = 9;
1311 if( seq[hv][ii] == 'Q' ) amino[hv][ii] = 10;
1312 if( seq[hv][ii] == 'H' ) amino[hv][ii] = 11;
1313 if( seq[hv][ii] == 'R' ) amino[hv][ii] = 12;
1314 if( seq[hv][ii] == 'K' ) amino[hv][ii] = 13;
1315 if( seq[hv][ii] == 'M' ) amino[hv][ii] = 14;
1316 if( seq[hv][ii] == 'I' ) amino[hv][ii] = 15;
1317 if( seq[hv][ii] == 'L' ) amino[hv][ii] = 16;
1318 if( seq[hv][ii] == 'V' ) amino[hv][ii] = 17;
1319 if( seq[hv][ii] == 'F' ) amino[hv][ii] = 18;
1320 if( seq[hv][ii] == 'Y' ) amino[hv][ii] = 19;
1321 if( seq[hv][ii] == 'W' ) amino[hv][ii] = 20;
1322 }
1323
1324
1325
1326 amino_acid[0] = 'X';
1327 amino_acid[1] = 'C';
1328 amino_acid[2] = 'S';
1329 amino_acid[3] = 'T';
1330 amino_acid[4] = 'P';
1331 amino_acid[5] = 'A';
1332 amino_acid[6] = 'G';
1333 amino_acid[7] = 'N';
1334 amino_acid[8] = 'D';
1335 amino_acid[9] = 'E';
1336 amino_acid[10] = 'Q';
1337 amino_acid[11] = 'H';
1338 amino_acid[12] = 'R';
1339 amino_acid[13] = 'K';
1340 amino_acid[14] = 'M';
1341 amino_acid[15] = 'I';
1342 amino_acid[16] = 'L';
1343 amino_acid[17] = 'V';
1344 amino_acid[18] = 'F';
1345 amino_acid[19] = 'Y';
1346 amino_acid[20] = 'W';
1347
1348
1349
1350 num_dia_anc = anc_num * (seqnum-1);
1351
1352
1353 if ( anchors )
1354 {
1355 if( time_stamps )
1356 beg_ts = clock() ;
1357
1358 if ( nas == 0 )
1359 {
1360 if( bubblesort )
1361 edialign_bubble_sort ( anc_num , anchor_frg ) ;
1362 else
1363 edialign_frag_sort ( anc_num , anchor_frg , 0 ) ;
1364 }
1365
1366 if( time_stamps)
1367 {
1368 end_ts = clock() ;
1369 time_diff_srt = (float) ( end_ts - beg_ts ) / CLOCKS_PER_SEC ;
1370 if( time_stamps )
1371 printf (" for anc: time_diff_srt = %f \n", time_diff_srt );
1372 }
1373
1374
1375 edialign_filter( &anc_num , anchor_frg);
1376 /* embExitBad() ;
1377 */
1378 }
1379
1380
1381 if(long_output)
1382 {
1383 fp_log = fopen(logname,"w");
1384 fprintf(fp_log,"\n # %s \n\n " , input_line );
1385 }
1386
1387 if(frg_mult_file)
1388 {
1389 fp_fsm = fopen(fsm_name,"w");
1390 fprintf(fp_fsm,"\n # %s \n\n" , input_line );
1391 }
1392
1393
1394
1395
1396
1397 if(
1398 (num_dia_bf = (int *) calloc( ( max_itnum + 1 ) , sizeof(int)))
1399 == NULL
1400 )
1401 {
1402 printf(" problems with memory allocation for `num_dia_bf' ! \n \n");
1403 embExitBad();
1404 }
1405
1406
1407 if(
1408 (num_dia_af = (int *) calloc( ( max_itnum + 1 ) , sizeof(int)))
1409 == NULL
1410 )
1411 {
1412 printf(" problems with memory allocation for `num_dia_af' ! \n \n");
1413 embExitBad();
1414 }
1415
1416
1417 all_it_dia = (struct multi_frag *) calloc(1, sizeof(struct multi_frag));
1418 current_dia = all_it_dia;
1419
1420
1421 strcpy(itname,printname);
1422 strcpy(itname2,printname);
1423 strcpy(itname3,printname);
1424 strcpy(itname4,printname);
1425 sprintf(str,".ali");
1426
1427 if( default_name )
1428 strcat(itname,str);
1429
1430 sprintf(str,".fa");
1431 strcat(itname2,str);
1432
1433
1434 if( msf_file )
1435 strcat(itname3,".ms");
1436
1437
1438 if( cw_file )
1439 strcat(itname4,".cw");
1440
1441 if( textual_alignment )
1442 {
1443 /*
1444 ajStrAssignS(&tnstr,ajFileGetPrintnameS(outfile));
1445 ajFileClose(&outfile);
1446
1447 fp_ali = fopen(ajStrGetPtr(tnstr),"wb");
1448 */
1449 fp_ali = ajFileGetFileptr(outfile);
1450
1451 /* fp_ali = fopen(itname,"w"); */
1452 }
1453
1454
1455 if( standard_out )
1456 fp_ali = stdout;
1457
1458 /* Sequence output A */
1459 fasta_file = 1;
1460
1461 /*
1462 if( textual_alignment )
1463 if(fasta_file)
1464 fp2 = fopen(itname2,"w");
1465 */
1466
1467 if(msf_file)
1468 fp3 = fopen(itname3,"w");
1469
1470 if(cw_file)
1471 fp4 = fopen(itname4,"w");
1472
1473 if( textual_alignment )
1474 edialign_para_print(seq_file , fp_ali);
1475
1476
1477 /***************************\
1478 * *
1479 * ITERATION START *
1480 * *
1481 \***************************/
1482
1483 istep = 0 ;
1484 while( ( cont_it == 1 ) && ( istep < max_itnum ) )
1485 {
1486 cont_it = 0 ;
1487 istep++ ;
1488
1489 /* printf("\n istep = %d \n", istep ); */
1490
1491
1492 this_it_dia = current_dia;
1493
1494 strcpy(itname,printname);
1495 strcpy(itname2,printname);
1496 strcpy(itname3,printname);
1497 strcpy(itname4,printname);
1498 sprintf(str,".ali");
1499
1500
1501 if( default_name )
1502 strcat(itname,str);
1503
1504 sprintf(str,".fa");
1505 strcat(itname2,str);
1506
1507 if( msf_file )
1508 strcat(itname3,".ms");
1509
1510
1511 if( cw_file )
1512 strcat(itname4,".cw");
1513
1514 weight_sum_af = 0;
1515 num_dia_bf[ istep ] = 0;
1516
1517 if( time_stamps )
1518 beg_pa = clock();
1519
1520
1521 if( ref_seq == 0 )
1522 i_max = seqnum ;
1523 else
1524 i_max = 1 ;
1525
1526 for(i = 0 ; i < i_max ; i++)
1527 {
1528 for(j = i + 1 ; j < seqnum ; j++)
1529 {
1530 /****************************************\
1531 * *
1532 * PAIRWISE ALIGNMENT *
1533 * *
1534 \****************************************/
1535
1536 if( cont_it_p[ i ][ j] )
1537 {
1538
1539 /*
1540 printf("\n out of frc it %d : wgt 20 = %f \n", istep ,
1541 wgt_dna[ 20 ][ 20 ] ) ;
1542 */
1543 score = edialign_frag_chain(i,j,fp_ali,fp_mot,&num_dia_p);
1544 }
1545 else
1546 {
1547 score = 0 ;
1548 num_dia_p = 0 ;
1549 }
1550
1551 if( istep == 1 )
1552 {
1553 pair_score[j][i] = score;
1554 pair_score[i][j] = score;
1555 }
1556
1557
1558 for(k=0;k<num_dia_p;k++)
1559 {
1560 *current_dia = pair_dia[k];
1561
1562 current_dia->next
1563 = (struct multi_frag *) calloc(1,
1564 sizeof(struct multi_frag));
1565 end_dia = current_dia;
1566 current_dia = current_dia->next;
1567 current_dia->pred = end_dia;
1568 }
1569
1570 num_dia_bf[ istep ] = num_dia_bf[ istep ] + num_dia_p;
1571
1572 for(hv=0; hv<num_dia_p;hv++)
1573 weight_sum_af = weight_sum_af + (pair_dia[hv]).weight;
1574
1575
1576 if(num_dia_p)
1577 free(pair_dia);
1578
1579 } /* for(j = i+1 ; j<seqnum ; j++) */
1580
1581 } /* for(i = 0 ; i<seqnum ; i++) */
1582
1583
1584 if( time_stamps )
1585 {
1586 end_pa = clock();
1587
1588 time_diff_pa = (float) ( end_pa - beg_pa ) / CLOCKS_PER_SEC ;
1589 if( time_stamps )
1590 printf (" time_diff_pa = %f \n", time_diff_pa );
1591 total_pa_time = total_pa_time + time_diff_pa;
1592 }
1593
1594
1595
1596
1597 if( break1 )
1598 {
1599 printf("\n break1\n");
1600 embExitBad() ;
1601 }
1602
1603
1604 /*
1605 if( pa_only )
1606 {
1607 printf("\n\n istep = %d, pa finished - exit \n\n", istep );
1608 embExitBad();
1609 }
1610 */
1611
1612 if(overlap_weights)
1613 {
1614 diagonal1 = this_it_dia;
1615 dia_counter = 0;
1616
1617 if( diagonal1 != NULL )
1618 while( diagonal1->next != NULL )
1619 {
1620 dia_counter++;
1621 if( print_status )
1622 if( ( dia_counter % 100 ) == 0 )
1623 {
1624 fp_st = fopen( pst_name ,"w");
1625
1626 fprintf(fp_st," dsd %s \n", input_line);
1627 fprintf(fp_st,"\n\n\n Status of the program "
1628 "run:\n");
1629 fprintf(fp_st," =========================="
1630 "\n\n");
1631 if( seqnum > 2 ) {
1632 fprintf(fp_st," iteration step %d in ",
1633 istep);
1634 fprintf(fp_st,"multiple alignment\n" );
1635 }
1636 fprintf(fp_st," calculating overlap weight "
1637 "for diagonals\n");
1638 fprintf(fp_st," current diagonal = %d\n\n",
1639 dia_counter );
1640 fprintf(fp_st," total number of");
1641 fprintf(fp_st," diagonals: %d\n\n\n\n",
1642 num_dia_bf[ istep ]);
1643 fclose(fp_st);
1644 }
1645
1646 diagonal2 = diagonal1->next;
1647
1648 while(diagonal2->next != NULL)
1649 {
1650 if( diagonal1->trans == diagonal2->trans )
1651 edialign_ow_add(diagonal1 , diagonal2);
1652 diagonal2 = diagonal2->next;
1653 }
1654 diagonal1 = diagonal1->next;
1655 }
1656
1657 if(bubblesort)
1658 edialign_ow_bubble_sort(num_dia_bf[istep],this_it_dia);
1659 else
1660 edialign_frag_sort(num_dia_bf[istep],this_it_dia,
1661 overlap_weights);
1662 }
1663 else /* no overlap_weights */
1664 {
1665 beg_ts = clock() ;
1666
1667 if( bubblesort )
1668 edialign_bubble_sort( num_dia_bf[ istep ] , this_it_dia );
1669 else
1670 edialign_frag_sort(num_dia_bf[istep],this_it_dia,
1671 overlap_weights);
1672
1673 end_ts = clock() ;
1674 time_diff_srt = (float) ( end_ts - beg_ts ) / CLOCKS_PER_SEC ;
1675 if( time_stamps )
1676 printf (" time_diff_srt = %f \n", time_diff_srt );
1677 }
1678
1679
1680 num_dia_af[ istep ] = num_dia_bf[ istep ];
1681 weight_sum_bf = weight_sum_af;
1682
1683 pairalignsum = 0;
1684 pairalignlen = 0;
1685
1686
1687 edialign_filter( num_dia_af + istep , this_it_dia);
1688 num_all_it_dia = num_all_it_dia + num_dia_af[ istep ];
1689
1690
1691 /*
1692 if( pa_only == 0 )
1693 {
1694 printf("\n\n istep = %d, filter finished - exit \n\n", istep );
1695 embExitBad();
1696 }
1697 */
1698
1699
1700 weight_sum_af = 0;
1701
1702 edialign_print_log( this_it_dia , fp_log , fp_fsm );
1703
1704 if( frag_file )
1705 edialign_print_fragments( this_it_dia , fp_frg );
1706
1707 edialign_throw_out( &weight_sum_af );
1708
1709 edialign_sel_test( );
1710
1711 threshold = threshold ;
1712
1713 if( break2 )
1714 {
1715 printf("\n break2\n");
1716 embExitBad() ;
1717 }
1718
1719
1720 } /* while ( cond_it == 1 ) */
1721
1722
1723 /***************************\
1724 * *
1725 * ITERATION END *
1726 * *
1727 \***************************/
1728
1729 strcpy( dist_name , printname);
1730 strcat(dist_name , ".dst");
1731
1732 if ( ref_seq == 0 )
1733 edialign_av_tree_print();
1734
1735 if( standard_out )
1736 fp_ali = stdout;
1737
1738 if(sf_mat)
1739 {
1740 edialign_subst_mat(input_name , num_all_it_dia , all_it_dia);
1741 }
1742
1743
1744 if( textual_alignment )
1745 edialign_ali_arrange(num_all_it_dia,all_it_dia,fp_ali,seqout,fp3,fp4,
1746 fp_csc,isprot);
1747
1748
1749 if(long_output)
1750 {
1751 /* fprintf(fp_log "\n\n thr = %f , lmax = %d , speed = %f */
1752 fprintf(fp_log, "\n\n total sum of weights: %f \n\n\n", tot_weight);
1753 fclose(fp_log);
1754 }
1755
1756
1757
1758 /*
1759 if( argnum == 1 )
1760 {
1761 printf("\n Program terminated normally\n");
1762 printf(" Results are contained in file `%s' \n \n \n", itname);
1763 }
1764 */
1765
1766 av_dia_num = (float) (2 * dia_num);
1767 av_dia_num = av_dia_num / ( seqnum * ( seqnum - 1) ) ;
1768
1769 av_max_dia_num = (float) (2 * max_dia_num);
1770 av_max_dia_num = av_max_dia_num / ( seqnum * ( seqnum - 1) ) ;
1771
1772
1773
1774 tmpi1 = (ajint) av_dia_num ;
1775 tmpi2 = (ajint) av_max_dia_num ;
1776
1777 if(pr_av_nd)
1778 printf(" %d ", tmpi1 );
1779
1780 if(pr_av_max_nd)
1781 printf(" %d ", tmpi2 );
1782
1783
1784
1785 if(pr_av_nd)
1786 fprintf(fp_ali, " %d fragments considered for alignment \n",
1787 tmpi1 );
1788
1789 if(pr_av_max_nd)
1790 fprintf(fp_ali, " %d fragments simultaneously stored \n\n", tmpi2 );
1791
1792 if( textual_alignment )
1793 {
1794 /* fclose(fp_ali); */
1795 ajFileClose(&outfile);
1796 }
1797
1798
1799
1800 if( time_stamps )
1801 {
1802 end_ali = clock() ;
1803 time_diff_ali = (float) ( end_ali - beg_ali ) / CLOCKS_PER_SEC ;
1804
1805 perc_pa_time = total_pa_time / time_diff_ali * 100 ;
1806 printf (" time_diff_ali = %f \n", time_diff_ali );
1807 printf (" total_pa_time = %f \n", total_pa_time );
1808 printf (" corresponds to %f percent \n\n", perc_pa_time );
1809 }
1810
1811
1812 for(i=0;i<seqnum;i++)
1813 {
1814 for(j=0;j<seqnum;j++)
1815 {
1816 if(gl_exclude_list)
1817 AJFREE(gl_exclude_list[i][j]);
1818 if(open_pos)
1819 AJFREE(open_pos[i][j]);
1820 }
1821
1822 if(gl_exclude_list) AJFREE(gl_exclude_list[i]);
1823 if(open_pos) AJFREE(open_pos[i]);
1824 if(pair_score) AJFREE(pair_score[i]);
1825 if(cont_it_p) AJFREE(cont_it_p[i]);
1826 if(glob_sim) AJFREE(glob_sim[i]);
1827 if(amino) AJFREE(amino[i]);
1828 if(amino_c) AJFREE(amino_c[i]);
1829 }
1830 AJFREE(gl_exclude_list);
1831 AJFREE(open_pos);
1832 AJFREE(pair_score);
1833 AJFREE(cont_it_p);
1834 AJFREE(glob_sim);
1835 AJFREE(amino);
1836 AJFREE(amino_c);
1837
1838 ajStrDel(&tnstr);
1839 ajSeqsetDel(&seqset);
1840 ajSeqoutDel(&seqout);
1841 ajStrDel(&nucmode);
1842 ajStrDel(&overlapw);
1843 ajStrDel(&linkage);
1844 ajFileClose(&outfile);
1845
1846 for(ii=1;ii<max_dia+1;ii++)
1847 {
1848 if(tp400_prot)AJFREE(tp400_prot[ii]);
1849 if(tp400_dna)AJFREE(tp400_dna[ii]);
1850 if(tp400_trans)AJFREE(tp400_trans[ii]);
1851 if(wgt_prot)AJFREE(wgt_prot[ii]);
1852 if(wgt_dna)AJFREE(wgt_dna[ii]);
1853 if(wgt_trans)AJFREE(wgt_trans[ii]);
1854 if(min_weight)AJFREE(min_weight[ii]);
1855 }
1856
1857 AJFREE(tp400_prot);
1858 AJFREE(tp400_dna);
1859 AJFREE(tp400_trans);
1860 AJFREE(wgt_prot);
1861 AJFREE(wgt_dna);
1862 AJFREE(wgt_trans);
1863 AJFREE(min_weight);
1864
1865 for(i=0;i<n;i++)
1866 {
1867 if(seq) AJFREE(seq[i]);
1868 if(seq_name) AJFREE(seq_name[i]);
1869 if(full_name) AJFREE(full_name[i]);
1870 }
1871
1872 AJFREE(seq);
1873 AJFREE(seq_name);
1874 AJFREE(full_name);
1875 AJFREE(seqlen);
1876
1877 if(arguments)
1878 {
1879 for(i=0;i<argnum;i++)
1880 AJFREE(arguments[i]);
1881 AJFREE(arguments);
1882 }
1883
1884 AJFREE(par_file);
1885 AJFREE(upg_str);
1886 AJFREE(num_dia_bf);
1887 AJFREE(num_dia_af);
1888 while(all_it_dia)
1889 {
1890 next_dia = all_it_dia->next;
1891 AJFREE(all_it_dia);
1892 all_it_dia = next_dia;
1893 }
1894
1895 edialign_freeAligGraphClosure(gabiosclos);
1896
1897 embExit();
1898
1899 return 0;
1900 } /* main */
1901
1902
1903
1904
1905 /* @funcstatic edialign_computeClosure ***************************************
1906 **
1907 ** edialign_computeClosure
1908 **
1909 ** @param [w] clos [edialignCLOSURE*] Closure
1910 ** @return [void]
1911 *****************************************************************************/
1912
edialign_computeClosure(edialignCLOSURE * clos)1913 static void edialign_computeClosure(edialignCLOSURE *clos)
1914 {
1915 ajint **Succ;
1916 ajint **Pred;
1917 ajint *NSucc;
1918 ajint *NPred;
1919 ajint *npred;
1920 ajint nsucc;
1921 ajint ni;
1922 ajint nj;
1923 ajint s;
1924 ajint top;
1925 ajint bottom;
1926 ajint n0;
1927 ajint p;
1928 ajint n;
1929 ajint i;
1930 ajint k;
1931 ajint pos_n;
1932 ajint x;
1933
1934 Succ = (ajint **) edialign_callouer_mat(sizeof(ajint), clos->nbrAligSets+2,
1935 clos->seqNbr);
1936 Pred = (ajint **) edialign_callouer_mat(sizeof(int), clos->nbrAligSets+2,
1937 clos->seqNbr);
1938 NSucc = (ajint *) edialign_allouer((clos->nbrAligSets+2) * sizeof(ajint));
1939 NPred = (ajint *) edialign_allouer((clos->nbrAligSets+2) * sizeof(ajint));
1940 npred = (ajint *) edialign_allouer((clos->nbrAligSets+2) * sizeof(ajint));
1941 clos->topolog = (ajint *) edialign_allouer(sizeof(int));
1942
1943 /* C A L C U L des Succ[n][x] et NPred[n] */
1944
1945 for(n=1; n <= clos->nbrAligSets; n++)
1946 NPred[n] = 0;
1947
1948 for(n=1; n <= clos->nbrAligSets; n++)
1949 {
1950 nsucc = 0;
1951 for(x=0; x < clos->seqNbr; x++)
1952 if(clos->aligSet[n].pos[x] > 0)
1953 {
1954 pos_n = clos->aligSet[n].pos[x];
1955
1956 for(i=pos_n+1; i <= clos->seq[x].longueur &&
1957 clos->seq[x].aligSetNbr[i] == 0; i++)
1958 clos->seq[x].predAligSetPos[i] = pos_n;
1959
1960 if(i <= clos->seq[x].longueur)
1961 {
1962 clos->seq[x].predAligSetPos[i] = pos_n;
1963 if(clos->aligSet[clos->seq[x].aligSetNbr[i]].nbr > 0)
1964 {
1965 n0 = Succ[n][nsucc] = clos->seq[x].aligSetNbr[i];
1966 clos->aligSet[n0].nbr = - clos->aligSet[n0].nbr;
1967 nsucc++;
1968 }
1969 }
1970
1971 for(i=pos_n-1; i > 0 && clos->seq[x].aligSetNbr[i] == 0; i--)
1972 clos->seq[x].succAligSetPos[i] = pos_n;
1973
1974 if(i > 0)
1975 clos->seq[x].succAligSetPos[i] = pos_n;
1976 }
1977
1978 for(p=0; p < nsucc; p++)
1979 {
1980 n0 = Succ[n][p];
1981 Pred[n0][NPred[n0]] = n;
1982 NPred[n0]++;
1983
1984 clos->aligSet[n0].nbr = - clos->aligSet[n0].nbr;
1985 }
1986 NSucc[n] = nsucc;
1987 }
1988
1989 /* C A L C U L de clos->topolog */
1990
1991 clos->topolog = (ajint *) edialign_reallouer(clos->topolog,
1992 (clos->nbrAligSets+2) *
1993 sizeof(ajint));
1994
1995 bottom = top = 0;
1996
1997 for(n=1; n <= clos->nbrAligSets; n++)
1998 {
1999 npred[n] = NPred[n];
2000 if (npred[n] == 0)
2001 {
2002 top++;
2003 clos->topolog[top] = n;
2004 }
2005 }
2006
2007
2008 while( bottom != top)
2009 {
2010 bottom++;
2011 ni = clos->topolog[bottom];
2012 for(s=0; s < NSucc[ni]; s++)
2013 {
2014 nj = Succ[ni][s];
2015 npred[nj]--;
2016 if(npred[nj] == 0)
2017 {
2018 top++;
2019 clos->topolog[top] = nj;
2020 }
2021 }
2022 }
2023
2024
2025 for(x=0; x < clos->seqNbr; x++)
2026 {
2027 clos->predFrontier[0][x] = 0;
2028 clos->succFrontier[clos->nbrAligSets+1][x] = clos->seq[x].longueur+1;
2029 }
2030
2031
2032 for(k=1; k <= clos->nbrAligSets; k++)
2033 {
2034 n0 = clos->topolog[k];
2035 for(x=0; x < clos->seqNbr; x++)
2036 {
2037 if(clos->aligSet[n0].pos[x] > 0)
2038 clos->predFrontier[n0][x] = clos->aligSet[n0].pos[x];
2039 else
2040 for(p=0, clos->predFrontier[n0][x]=0; p < NPred[n0]; p++)
2041 {
2042 n = Pred[n0][p];
2043 if (clos->predFrontier[n][x] > clos->predFrontier[n0][x])
2044 clos->predFrontier[n0][x] = clos->predFrontier[n][x];
2045 }
2046 }
2047 }
2048
2049
2050 for(k=clos->nbrAligSets; k > 0; k--)
2051 {
2052 n0 = clos->topolog[k];
2053 for(x=0; x < clos->seqNbr; x++)
2054 {
2055 if(clos->aligSet[n0].pos[x] > 0)
2056 clos->succFrontier[n0][x] = clos->aligSet[n0].pos[x];
2057 else
2058 for(p=0, clos->succFrontier[n0][x]=clos->seq[x].longueur+1;
2059 p < NSucc[n0]; p++)
2060 {
2061 n = Succ[n0][p];
2062 if(clos->succFrontier[n][x] < clos->succFrontier[n0][x])
2063 clos->succFrontier[n0][x] = clos->succFrontier[n][x];
2064 }
2065 }
2066 }
2067
2068 edialign_liberer(npred);
2069 edialign_liberer(NPred);
2070 edialign_liberer(NSucc);
2071 edialign_liberer_mat((void **) Pred, clos->nbrAligSets+2);
2072 edialign_liberer_mat((void **) Succ, clos->nbrAligSets+2);
2073 edialign_liberer(clos->topolog);
2074
2075 return;
2076 }
2077
2078
2079
2080
2081 /* @funcstatic edialign_moveAligSet ******************************************
2082 **
2083 ** edialign_moveAligSet
2084 **
2085 ** @param [w] clos [edialignCLOSURE*] Closure
2086 ** @param [r] n1 [ajint] Undocumented
2087 ** @param [r] n2 [ajint] Undocumented
2088 ** @return [void]
2089 *****************************************************************************/
2090
edialign_moveAligSet(edialignCLOSURE * clos,ajint n1,ajint n2)2091 static void edialign_moveAligSet(edialignCLOSURE *clos, ajint n1, ajint n2)
2092 {
2093 ajint x;
2094 ajint k;
2095
2096 for(x=0; x < clos->seqNbr; x++)
2097 {
2098 k = clos->aligSet[n1].pos[x] = clos->aligSet[n2].pos[x];
2099 if(k > 0)
2100 clos->seq[x].aligSetNbr[k] = n1;
2101
2102 clos->predFrontier[n1][x] = clos->predFrontier[n2][x];
2103 clos->succFrontier[n1][x] = clos->succFrontier[n2][x];
2104 }
2105
2106 clos->aligSet[n1].nbr = clos->aligSet[n2].nbr;
2107
2108 return;
2109 }
2110
2111
2112
2113
2114 /* @funcstatic edialign_read_closure ****************************************
2115 **
2116 ** edialign_read_closure
2117 **
2118 ** @param [w] clos [edialignCLOSURE*] Closure
2119 ** @param [r] nbreancr [ajint] Undocumented
2120 ** @param [w] ancrages [ajint**] Undocumented
2121 ** @return [void]
2122 *****************************************************************************/
2123
edialign_read_closure(edialignCLOSURE * clos,ajint nbreancr,ajint ** ancrages)2124 static void edialign_read_closure(edialignCLOSURE *clos, ajint nbreancr,
2125 ajint **ancrages)
2126 {
2127 ajint x;
2128 ajint ind;
2129 ajint n;
2130
2131 for(n=0; n < nbreancr; n++)
2132 {
2133 clos->nbrAligSets++;
2134 edialign_realloc_closure(clos);
2135
2136 clos->aligSet[clos->nbrAligSets].nbr = 0;
2137 for(x=0; x < clos->seqNbr; x++)
2138 {
2139 ind = clos->aligSet[clos->nbrAligSets].pos[x] = ancrages[n][x];
2140 if(ind > 0)
2141 {
2142 clos->aligSet[clos->nbrAligSets].nbr++;
2143 clos->seq[x].aligSetNbr[ind] = clos->nbrAligSets;
2144 }
2145 }
2146 }
2147
2148 edialign_computeClosure(clos);
2149 }
2150
2151
2152
2153
2154 /* @funcstatic edialign_init_closure ****************************************
2155 **
2156 ** edialign_init_closure
2157 **
2158 ** @param [w] clos [edialignCLOSURE*] Closure
2159 ** @param [r] nbreancr [ajint] Undocumented
2160 ** @param [w] ancrages [ajint**] Undocumented
2161 ** @return [void]
2162 *****************************************************************************/
2163
edialign_init_closure(edialignCLOSURE * clos,ajint nbreancr,ajint ** ancrages)2164 static void edialign_init_closure(edialignCLOSURE *clos, ajint nbreancr,
2165 ajint **ancrages)
2166 {
2167 ajint x;
2168 ajint i;
2169 ajint *longsequ;
2170
2171 longsequ = (ajint *) edialign_allouer(clos->seqNbr * sizeof(ajint));
2172
2173 for(x=0; x < clos->seqNbr; x++)
2174 {
2175 longsequ[x] = clos->seq[x].longueur;
2176 for(i=1; i <= clos->seq[x].longueur; i++)
2177 clos->seq[x].aligSetNbr[i] = clos->seq[x].succAligSetPos[i]
2178 = clos->seq[x].predAligSetPos[i] = 0;
2179 }
2180
2181 clos->nbrAligSets = 0;
2182
2183 if(nbreancr > 0)
2184 edialign_read_closure(clos, nbreancr, ancrages);
2185
2186 for(x=0; x < clos->seqNbr; x++)
2187 clos->seq[x].longueur = longsequ[x];
2188
2189 edialign_liberer(longsequ);
2190
2191 return;
2192 }
2193
2194
2195
2196
2197 /* @funcstatic edialign_alloc_closure ****************************************
2198 **
2199 ** edialign_alloc_closure
2200 **
2201 ** @param [w] clos [edialignCLOSURE*] Closure
2202 ** @return [void]
2203 *****************************************************************************/
2204
edialign_alloc_closure(edialignCLOSURE * clos)2205 static void edialign_alloc_closure(edialignCLOSURE *clos)
2206 {
2207 ajlong na;
2208 ajint x;
2209
2210 /* sera re'alloue' */
2211 clos->predFrontier = (ajint **) edialign_callouer_mat(sizeof(int),
2212 clos->maxLong+2,
2213 clos->seqNbr+1);
2214
2215 /* sera re'alloue' */
2216 clos->succFrontier = (ajint **) edialign_callouer_mat(sizeof(int),
2217 clos->maxLong+2,
2218 clos->seqNbr+1);
2219
2220 /* sera re'alloue' */
2221 clos->aligSet =
2222 (edialignPositionSet *) edialign_allouer((clos->maxLong+2) *
2223 sizeof(edialignPositionSet));
2224
2225 for(na=0; na <= clos->maxLong+1; na++)
2226 clos->aligSet[na].pos = (ajint *) edialign_allouer(clos->seqNbr *
2227 sizeof(ajint));
2228
2229 clos->oldNbrAligSets = clos->maxLong;
2230
2231 for (x=0; x < clos->seqNbr; x++)
2232 {
2233 clos->seq[x].aligSetNbr = (ajint *) edialign_allouer((clos->seq[x].
2234 longueur+2)*
2235 sizeof(ajint));
2236 clos->seq[x].predAligSetPos = (ajint *)
2237 edialign_allouer((clos->seq[x].longueur+2)*sizeof(ajint));
2238 clos->seq[x].succAligSetPos = (ajint *)
2239 edialign_allouer((clos->seq[x].longueur+2)*sizeof(ajint));
2240 }
2241
2242 clos->gauche1 = (ajint *) edialign_allouer(clos->seqNbr * sizeof(ajint));
2243 clos->gauche2 = (ajint *) edialign_allouer(clos->seqNbr * sizeof(ajint));
2244 clos->droite1 = (ajint *) edialign_allouer(clos->seqNbr * sizeof(ajint));
2245 clos->droite2 = (ajint *) edialign_allouer(clos->seqNbr * sizeof(ajint));
2246 clos->pos_ = (ajint **) edialign_callouer_mat(sizeof(ajint), clos->seqNbr,
2247 clos->seqNbr);
2248
2249 return;
2250 }
2251
2252
2253
2254
2255 /* @funcstatic edialign_free_closure ****************************************
2256 **
2257 ** edialign_free_closure. Unused.
2258 **
2259 ** @param [w] clos [edialignCLOSURE*] Closure
2260 ** @return [void]
2261 *****************************************************************************/
2262
edialign_free_closure(edialignCLOSURE * clos)2263 static void edialign_free_closure(edialignCLOSURE *clos)
2264 {
2265 ajlong na;
2266 ajint x;
2267
2268 edialign_liberer(clos->gauche1);
2269 edialign_liberer(clos->gauche2);
2270 edialign_liberer(clos->droite1);
2271 edialign_liberer(clos->droite2);
2272 edialign_liberer_mat((void **) clos->pos_, clos->seqNbr);
2273
2274 edialign_liberer_mat((void **) clos->succFrontier,clos->oldNbrAligSets+2);
2275 edialign_liberer_mat((void **) clos->predFrontier,clos->oldNbrAligSets+2);
2276
2277 for(x=0; x < clos->seqNbr; x++)
2278 {
2279 edialign_liberer(clos->seq[x].aligSetNbr);
2280 edialign_liberer(clos->seq[x].predAligSetPos);
2281 edialign_liberer(clos->seq[x].succAligSetPos);
2282 }
2283
2284 for(na=0; na <= clos->oldNbrAligSets+1; na++)
2285 {
2286 edialign_liberer(clos->aligSet[na].pos);
2287 }
2288 edialign_liberer(clos->aligSet);
2289
2290 return;
2291 }
2292
2293
2294
2295
2296 /* @funcstatic edialign_realloc_closure **************************************
2297 **
2298 ** edialign_realloc_closure
2299 **
2300 ** @param [w] clos [edialignCLOSURE*] Closure
2301 ** @return [void]
2302 *****************************************************************************/
2303
edialign_realloc_closure(edialignCLOSURE * clos)2304 static void edialign_realloc_closure(edialignCLOSURE *clos)
2305 {
2306 ajint na;
2307
2308 if(clos->nbrAligSets > clos->oldNbrAligSets)
2309 {
2310 clos->predFrontier = (ajint **)
2311 edialign_recallouer_mat((void **) clos->predFrontier,
2312 sizeof(ajint),
2313 clos->oldNbrAligSets+2,
2314 clos->nbrAligSets+2,
2315 clos->seqNbr+1);
2316
2317 clos->succFrontier = (ajint **)
2318 edialign_recallouer_mat((void **) clos->succFrontier,
2319 sizeof(ajint),
2320 clos->oldNbrAligSets+2,
2321 clos->nbrAligSets+2,
2322 clos->seqNbr+1);
2323
2324 clos->aligSet = (edialignPositionSet *)
2325 edialign_reallouer(clos->aligSet, (clos->nbrAligSets+2) *
2326 sizeof(edialignPositionSet));
2327
2328 for(na=clos->oldNbrAligSets+2; na <= clos->nbrAligSets+1; na++)
2329 {
2330 clos->aligSet[na].pos = (ajint *)
2331 edialign_allouer(clos->seqNbr * sizeof(ajint));
2332 }
2333 clos->oldNbrAligSets = clos->nbrAligSets;
2334 }
2335
2336 return;
2337 }
2338
2339
2340
2341
2342 #if 0
2343 /* @funcstatic edialign_print_aligSets **************************************
2344 **
2345 ** edialign_print_aligSets. Unused.
2346 **
2347 ** @param [w] clos [edialignCLOSURE*] Closure
2348 ** @param [r] nseq [ajint] Undocumented
2349 ** @param [r] i [ajint] Undocumented
2350 ** @return [ajint] Undocumented
2351 *****************************************************************************/
2352
2353 static ajint edialign_print_aligSets(edialignCLOSURE *clos,
2354 ajint nseq, ajint i)
2355 {
2356 ajint n;
2357 ajint ng;
2358 ajint nd;
2359 ajint k;
2360 ajint x;
2361
2362 n = ng = nd = clos->seq[nseq].aligSetNbr[i];
2363
2364 if(ng == 0)
2365 {
2366 k = clos->seq[nseq].predAligSetPos[i];
2367 if (k > 0)
2368 ng = clos->seq[nseq].aligSetNbr[k];
2369 k = clos->seq[nseq].succAligSetPos[i];
2370 if (k > 0)
2371 nd = clos->seq[nseq].aligSetNbr[k];
2372 }
2373
2374 printf("echelle %d: ", n);
2375 if(n != 0)
2376 for(x=0; x < clos->seqNbr; x++)
2377 printf("%d ", clos->aligSet[n].pos[x]);
2378
2379 printf("\nfrontiere clos->gauche %d: ", ng);
2380 if(ng != 0)
2381 for(x=0; x < clos->seqNbr; x++)
2382 printf("%d ", clos->predFrontier[ng][x]);
2383
2384 printf("\nfrontiere clos->droite %d: ", nd);
2385 if(nd != 0)
2386 for (x=0; x < clos->seqNbr; x++)
2387 printf("%d ", clos->succFrontier[nd][x]);
2388
2389 printf("\n");
2390
2391 return 0;
2392 }
2393 #endif
2394
2395
2396
2397
2398 /* @funcstatic edialign_init_seq **************************************
2399 **
2400 ** edialign_init_seq
2401 **
2402 ** @param [w] clos [edialignCLOSURE*] Undocumented
2403 ** @param [r] nbreseq [ajint] Undocumented
2404 ** @param [w] longseq [ajint*] Undocumented
2405 ** @return [void]
2406 *****************************************************************************/
2407
edialign_init_seq(edialignCLOSURE * clos,ajint nbreseq,ajint * longseq)2408 static void edialign_init_seq(edialignCLOSURE *clos,
2409 ajint nbreseq, ajint *longseq)
2410 {
2411 ajint x;
2412
2413 clos->seqNbr = nbreseq;
2414
2415 clos->seq =
2416 (edialignSequence *) edialign_allouer(clos->seqNbr *
2417 sizeof(edialignSequence));
2418
2419 for (x=clos->maxLong=0; x < clos->seqNbr; x++)
2420 {
2421 clos->seq[x].longueur = longseq[x];
2422 if (clos->maxLong < longseq[x])
2423 clos->maxLong = longseq[x];
2424 }
2425
2426 return;
2427 }
2428
2429
2430
2431
2432 /* @funcstatic edialign_desinit_seq **************************************
2433 **
2434 ** edialign_desinit_seq. Unused.
2435 **
2436 ** @param [w] clos [edialignCLOSURE*] Undocumented
2437 ** @return [void]
2438 *****************************************************************************/
2439
edialign_desinit_seq(edialignCLOSURE * clos)2440 static void edialign_desinit_seq(edialignCLOSURE *clos)
2441 {
2442
2443 edialign_liberer(clos->seq);
2444
2445 return;
2446 }
2447
2448
2449
2450
2451 /* @funcstatic edialign_newAligGraphClosure **********************************
2452 **
2453 ** edialign_newAligGraphClosure
2454 **
2455 ** @param [r] nbreseq [ajint] Undocumented
2456 ** @param [w] longseq [ajint*] Undocumented
2457 ** @param [r] nbreancr [ajint] Undocumented
2458 ** @param [w] ancrages [ajint**] Undocumented
2459 ** @return [edialignCLOSURE*] Undocumented
2460 *****************************************************************************/
2461
edialign_newAligGraphClosure(ajint nbreseq,ajint * longseq,ajint nbreancr,ajint ** ancrages)2462 static edialignCLOSURE* edialign_newAligGraphClosure(ajint nbreseq,
2463 ajint *longseq,
2464 ajint nbreancr,
2465 ajint **ancrages)
2466 {
2467
2468 edialignCLOSURE *clos =
2469 (edialignCLOSURE *) edialign_allouer(sizeof(edialignCLOSURE));
2470
2471 edialign_init_seq(clos, nbreseq, longseq);
2472
2473 edialign_alloc_closure(clos); /* utilise clos->maxLong */
2474
2475 edialign_init_closure(clos, nbreancr, ancrages);
2476
2477 return clos;
2478 }
2479
2480
2481
2482
2483 /* @funcstatic edialign_freeAligGraphClosure **********************************
2484 **
2485 ** edialign_freeAligGraphClosure. Unused.
2486 **
2487 ** @param [w] clos [edialignCLOSURE*] Undocumented
2488 ** @return [void]
2489 *****************************************************************************/
2490
edialign_freeAligGraphClosure(edialignCLOSURE * clos)2491 static void edialign_freeAligGraphClosure(edialignCLOSURE *clos)
2492 {
2493 edialign_free_closure(clos);
2494
2495 edialign_desinit_seq(clos);
2496
2497 edialign_liberer(clos);
2498
2499 return;
2500 }
2501
2502
2503
2504
2505 /* @funcstatic edialign_addAlignedPositions **********************************
2506 **
2507 ** edialign_addAlignedPositions
2508 **
2509 ** @param [w] clos [edialignCLOSURE*] Undocumented
2510 ** @param [r] seq1 [ajint] Undocumented
2511 ** @param [r] i [ajint] Undocumented
2512 ** @param [r] seq2 [ajint] Undocumented
2513 ** @param [r] j [ajint] Undocumented
2514 ** @return [ajint] Undocumented
2515 *****************************************************************************/
2516
edialign_addAlignedPositions(edialignCLOSURE * clos,ajint seq1,ajint i,ajint seq2,ajint j)2517 static ajint edialign_addAlignedPositions(edialignCLOSURE *clos,
2518 ajint seq1, ajint i,
2519 ajint seq2, ajint j)
2520 {
2521 ajint n;
2522 ajint n1;
2523 ajint n2;
2524 ajint ng1;
2525 ajint ng2;
2526 ajint nd1;
2527 ajint nd2;
2528 ajint nn;
2529 ajint k;
2530 ajint x;
2531 ajint y;
2532
2533 n1 = ng1 = nd1 = clos->seq[seq1].aligSetNbr[i];
2534 n2 = ng2 = nd2 = clos->seq[seq2].aligSetNbr[j];
2535
2536 if(n1 == 0 || n2 == 0 || n1 != n2)
2537 {
2538 if(ng1 == 0)
2539 {
2540 k = clos->seq[seq1].predAligSetPos[i];
2541 if(k > 0)
2542 ng1 = clos->seq[seq1].aligSetNbr[k];
2543 k = clos->seq[seq1].succAligSetPos[i];
2544 if(k > 0)
2545 nd1 = clos->seq[seq1].aligSetNbr[k];
2546 }
2547
2548 if(ng2 == 0)
2549 {
2550 k = clos->seq[seq2].predAligSetPos[j];
2551 if(k > 0)
2552 ng2 = clos->seq[seq2].aligSetNbr[k];
2553 k = clos->seq[seq2].succAligSetPos[j];
2554 if(k > 0)
2555 nd2 = clos->seq[seq2].aligSetNbr[k];
2556 }
2557
2558 if(ng1 == 0)
2559 for(x=0; x < clos->seqNbr; x++)
2560 clos->gauche1[x] = 0;
2561 else
2562 for(x=0; x < clos->seqNbr; x++)
2563 clos->gauche1[x] = clos->predFrontier[ng1][x];
2564
2565 if(nd1 == 0)
2566 for(x=0; x < clos->seqNbr; x++)
2567 clos->droite1[x] = clos->seq[x].longueur + 1;
2568 else
2569 for(x=0; x < clos->seqNbr; x++)
2570 clos->droite1[x] = clos->succFrontier[nd1][x];
2571
2572 if(ng2 == 0)
2573 for(x=0; x < clos->seqNbr; x++)
2574 clos->gauche2[x] = 0;
2575 else
2576 for(x=0; x < clos->seqNbr; x++)
2577 clos->gauche2[x] = clos->predFrontier[ng2][x];
2578
2579 if(nd2 == 0)
2580 for(x=0; x < clos->seqNbr; x++)
2581 clos->droite2[x] = clos->seq[x].longueur + 1;
2582 else
2583 for(x=0; x < clos->seqNbr; x++)
2584 clos->droite2[x] = clos->succFrontier[nd2][x];
2585
2586 clos->gauche1[seq1] = clos->droite1[seq1] = i;
2587 clos->gauche2[seq2] = clos->droite2[seq2] = j;
2588
2589 nn = clos->nbrAligSets + 1;
2590
2591 for(x=0; x < clos->seqNbr; x++)
2592 {
2593 clos->aligSet[nn].pos[x] = 0;
2594 if(n1 > 0 && clos->aligSet[n1].pos[x] > 0)
2595 clos->aligSet[nn].pos[x] = clos->aligSet[n1].pos[x];
2596 else
2597 {
2598 if(n2 > 0 && clos->aligSet[n2].pos[x] > 0)
2599 clos->aligSet[nn].pos[x] = clos->aligSet[n2].pos[x];
2600 }
2601
2602 if (clos->aligSet[nn].pos[x] == 0)
2603 {
2604 clos->predFrontier[nn][x] = edmax(clos->gauche1[x],
2605 clos->gauche2[x]);
2606 clos->succFrontier[nn][x] = edmin(clos->droite1[x],
2607 clos->droite2[x]);
2608 }
2609 else
2610 clos->predFrontier[nn][x] = clos->succFrontier[nn][x] =
2611 clos->aligSet[nn].pos[x];
2612 }
2613
2614 clos->predFrontier[nn][seq1] = clos->succFrontier[nn][seq1] =
2615 clos->aligSet[nn].pos[seq1] = i;
2616
2617 clos->predFrontier[nn][seq2] = clos->succFrontier[nn][seq2] =
2618 clos->aligSet[nn].pos[seq2] = j;
2619
2620
2621 for(x=clos->aligSet[nn].nbr=0; x < clos->seqNbr; x++)
2622 if(clos->aligSet[nn].pos[x] > 0)
2623 {
2624 k = clos->aligSet[nn].pos[x];
2625 clos->seq[x].aligSetNbr[k] = nn;
2626 clos->aligSet[nn].nbr++;
2627 }
2628
2629 for(x=0; x < clos->seqNbr; x++)
2630 if(clos->droite1[x] != clos->droite2[x])
2631 /* => la front. clos->gauche peut changer */
2632 for(y=0; y < clos->seqNbr; y++)
2633 {
2634 clos->pos_[x][y] = 0;
2635 k = clos->succFrontier[nn][x];
2636 if(k == clos->aligSet[nn].pos[x])
2637 k = clos->seq[x].succAligSetPos[k];
2638 if(k <= clos->seq[x].longueur)
2639 while(k > 0)
2640 {
2641 n = clos->seq[x].aligSetNbr[k];
2642 if(clos->predFrontier[n][y] <
2643 clos->predFrontier[nn][y])
2644 {
2645 clos->pos_[x][y] = k;
2646 k = clos->seq[x].succAligSetPos[k];
2647 }
2648 else
2649 k = 0;
2650 }
2651 }
2652
2653 for(x=0; x < clos->seqNbr; x++)
2654 if(clos->droite1[x] != clos->droite2[x])
2655 /* => la front. gauche peut changer */
2656 for(y=0; y < clos->seqNbr; y++)
2657 {
2658 k = clos->succFrontier[nn][x];
2659 if(k == clos->aligSet[nn].pos[x])
2660 k = clos->seq[x].succAligSetPos[k];
2661 if(clos->pos_[x][y] > 0)
2662 while(k > 0 && k <= clos->pos_[x][y])
2663 {
2664 n = clos->seq[x].aligSetNbr[k];
2665 clos->predFrontier[n][y] =
2666 clos->predFrontier[nn][y];
2667 k = clos->seq[x].succAligSetPos[k];
2668 }
2669 }
2670
2671 for(x=0; x < clos->seqNbr; x++)
2672 if(clos->gauche1[x] != clos->gauche2[x])
2673 /* => la front. droite peut changer */
2674 for(y=0; y < clos->seqNbr; y++)
2675 {
2676 clos->pos_[x][y] = 0;
2677 k = clos->predFrontier[nn][x];
2678 if(k > 0 && k == clos->aligSet[nn].pos[x])
2679 k = clos->seq[x].predAligSetPos[k];
2680 while(k > 0)
2681 {
2682 n = clos->seq[x].aligSetNbr[k];
2683 if(clos->succFrontier[n][y] >
2684 clos->succFrontier[nn][y])
2685 {
2686 clos->pos_[x][y] = k;
2687 k = clos->seq[x].predAligSetPos[k];
2688 }
2689 else
2690 k = 0;
2691 }
2692 }
2693
2694 for(x=0; x < clos->seqNbr; x++)
2695 if(clos->gauche1[x] != clos->gauche2[x])
2696 /* => la front. clos->droite peut changer */
2697 for(y=0; y < clos->seqNbr; y++)
2698 {
2699 k = clos->predFrontier[nn][x];
2700 if(k > 0 && k == clos->aligSet[nn].pos[x])
2701 k = clos->seq[x].predAligSetPos[k];
2702 if(clos->pos_[x][y] > 0)
2703 while(k >= clos->pos_[x][y])
2704 {
2705 n = clos->seq[x].aligSetNbr[k];
2706 clos->succFrontier[n][y] =
2707 clos->succFrontier[nn][y];
2708 k = clos->seq[x].predAligSetPos[k];
2709 }
2710 }
2711
2712 if(n1 == 0)
2713 {
2714 for(k=i-1; k > 0 && clos->seq[seq1].aligSetNbr[k] == 0; k--)
2715 clos->seq[seq1].succAligSetPos[k] = i;
2716 if(k > 0)
2717 clos->seq[seq1].succAligSetPos[k] = i;
2718 for(k=i+1; k <= clos->seq[seq1].longueur
2719 && clos->seq[seq1].aligSetNbr[k] == 0; k++)
2720 clos->seq[seq1].predAligSetPos[k] = i;
2721 if(k <= clos->seq[seq1].longueur)
2722 clos->seq[seq1].predAligSetPos[k] = i;
2723 }
2724
2725 if(n2 == 0)
2726 {
2727 for(k=j-1; k > 0 && clos->seq[seq2].aligSetNbr[k] == 0; k--)
2728 clos->seq[seq2].succAligSetPos[k] = j;
2729 if(k > 0)
2730 clos->seq[seq2].succAligSetPos[k] = j;
2731 for(k=j+1; k <= clos->seq[seq2].longueur
2732 && clos->seq[seq2].aligSetNbr[k] == 0; k++)
2733 clos->seq[seq2].predAligSetPos[k] = j;
2734 if(k <= clos->seq[seq2].longueur)
2735 clos->seq[seq2].predAligSetPos[k] = j;
2736 }
2737
2738
2739 if (n1 > n2)
2740 {
2741 n = n1;
2742 n1 = n2;
2743 n2 = n;
2744 }
2745
2746 if (n2 == 0)
2747 {
2748 clos->nbrAligSets++;
2749
2750 edialign_realloc_closure(clos);
2751 }
2752 else
2753 {
2754 if(n1 == 0)
2755 {
2756 edialign_moveAligSet(clos, n2, nn);
2757 }
2758 else
2759 {
2760 edialign_moveAligSet(clos, n1, nn);
2761
2762 if(n2 < clos->nbrAligSets)
2763 edialign_moveAligSet(clos, n2, clos->nbrAligSets);
2764 clos->nbrAligSets--;
2765
2766 edialign_realloc_closure(clos);
2767 }
2768 }
2769 }
2770
2771 return 0;
2772 }
2773
2774
2775
2776
2777 /* @funcstatic edialign_path ************************************************
2778 **
2779 ** edialign_path
2780 **
2781 ** @param [w] clos [edialignCLOSURE*] Undocumented
2782 ** @param [r] x [ajint] Undocumented
2783 ** @param [r] i [ajint] Undocumented
2784 ** @param [r] y [ajint] Undocumented
2785 ** @param [r] j [ajint] Undocumented
2786 ** @return [ajint] Undocumented
2787 *****************************************************************************/
2788
edialign_path(edialignCLOSURE * clos,ajint x,ajint i,ajint y,ajint j)2789 static ajint edialign_path(edialignCLOSURE *clos,
2790 ajint x, ajint i, ajint y, ajint j)
2791 {
2792 ajint n2;
2793 ajint k;
2794
2795 if(x == y)
2796 return(i <= j);
2797
2798 n2 = clos->seq[y].aligSetNbr[j];
2799
2800 if(n2 == 0)
2801 {
2802 k = clos->seq[y].predAligSetPos[j];
2803 if(k > 0)
2804 n2 = clos->seq[y].aligSetNbr[k];
2805 }
2806
2807 if(n2 == 0)
2808 return(0);
2809
2810 return(i <= clos->predFrontier[n2][x]);
2811 }
2812
2813
2814
2815
2816 /* @funcstatic edialign_alignedPositions ************************************
2817 **
2818 ** edialign_alignedPositions
2819 **
2820 ** @param [w] clos [edialignCLOSURE*] Undocumented
2821 ** @param [r] x [ajint] Undocumented
2822 ** @param [r] i [ajint] Undocumented
2823 ** @param [r] y [ajint] Undocumented
2824 ** @param [r] j [ajint] Undocumented
2825 ** @return [ajint] Undocumented
2826 *****************************************************************************/
2827
edialign_alignedPositions(edialignCLOSURE * clos,ajint x,ajint i,ajint y,ajint j)2828 static ajint edialign_alignedPositions(edialignCLOSURE *clos,
2829 ajint x, ajint i,
2830 ajint y, ajint j)
2831 {
2832
2833 return (x == y && i == j) || (clos->seq[x].aligSetNbr[i] != 0 &&
2834 clos->seq[x].aligSetNbr[i] ==
2835 clos->seq[y].aligSetNbr[j]);
2836 }
2837
2838
2839
2840
2841 /* @funcstatic edialign_alignablePositions ************************************
2842 **
2843 ** edialign_alignablePositions
2844 **
2845 ** @param [w] clos [edialignCLOSURE*] Undocumented
2846 ** @param [r] x [ajint] Undocumented
2847 ** @param [r] i [ajint] Undocumented
2848 ** @param [r] y [ajint] Undocumented
2849 ** @param [r] j [ajint] Undocumented
2850 ** @return [ajint] Undocumented
2851 *****************************************************************************/
2852
edialign_alignablePositions(edialignCLOSURE * clos,ajint x,ajint i,ajint y,ajint j)2853 static ajint edialign_alignablePositions(edialignCLOSURE *clos,
2854 ajint x, ajint i,
2855 ajint y, ajint j)
2856 {
2857
2858 if(edialign_path(clos, x, i, y, j))
2859 return(edialign_path(clos, y, j, x, i));
2860
2861 return(!edialign_path(clos, y, j, x, i));
2862 }
2863
2864
2865
2866
2867 /* @funcstatic edialign_addAlignedSegments **********************************
2868 **
2869 ** edialign_addAlignedSegments
2870 **
2871 ** @param [w] clos [edialignCLOSURE*] Undocumented
2872 ** @param [r] x [ajint] Undocumented
2873 ** @param [r] i [ajint] Undocumented
2874 ** @param [r] y [ajint] Undocumented
2875 ** @param [r] j [ajint] Undocumented
2876 ** @param [r] l [ajint] Undocumented
2877 ** @return [ajint] Undocumented
2878 *****************************************************************************/
2879
edialign_addAlignedSegments(edialignCLOSURE * clos,ajint x,ajint i,ajint y,ajint j,ajint l)2880 static ajint edialign_addAlignedSegments(edialignCLOSURE *clos,
2881 ajint x, ajint i,
2882 ajint y, ajint j, ajint l)
2883 {
2884 ajint k;
2885
2886 for(k=0; k < l; i++, j++, k++)
2887 edialign_addAlignedPositions(clos, x, i, y, j);
2888
2889 return 0;
2890 }
2891
2892
2893
2894
2895 /* @funcstatic edialign_alignableSegments **********************************
2896 **
2897 ** edialign_alignableSegments
2898 **
2899 ** @param [w] clos [edialignCLOSURE*] Undocumented
2900 ** @param [r] x [ajint] Undocumented
2901 ** @param [r] i [ajint] Undocumented
2902 ** @param [r] y [ajint] Undocumented
2903 ** @param [r] j [ajint] Undocumented
2904 ** @param [r] l [ajint] Undocumented
2905 ** @return [ajint] Undocumented
2906 *****************************************************************************/
2907
edialign_alignableSegments(edialignCLOSURE * clos,ajint x,ajint i,ajint y,ajint j,ajint l)2908 static ajint edialign_alignableSegments(edialignCLOSURE *clos,
2909 ajint x, ajint i,
2910 ajint y, ajint j, ajint l)
2911 {
2912 ajint k;
2913
2914 for(k=0; k < l && edialign_alignablePositions(clos, x, i, y, j); i++, j++,
2915 k++);
2916
2917 return(k==l);
2918 }
2919
2920
2921
2922
2923 #if 0
2924 /* @funcstatic edialign_alignedSegments **********************************
2925 **
2926 ** edialign_alignedSegments. Unused.
2927 **
2928 ** @param [w] clos [edialignCLOSURE*] Undocumented
2929 ** @param [r] x [ajint] Undocumented
2930 ** @param [r] i [ajint] Undocumented
2931 ** @param [r] y [ajint] Undocumented
2932 ** @param [r] j [ajint] Undocumented
2933 ** @param [r] l [ajint] Undocumented
2934 ** @return [ajint] Undocumented
2935 *****************************************************************************/
2936
2937 static ajint edialign_alignedSegments(edialignCLOSURE *clos,
2938 ajint x, ajint i, ajint y,
2939 ajint j, ajint l)
2940 {
2941 ajint k;
2942
2943 for(k=0; k < l && edialign_alignedPositions(clos, x, i, y, j); i++, j++,
2944 k++);
2945
2946 return(k==l);
2947 }
2948 #endif
2949
2950
2951
2952
2953 /* on suppose que x!=y */
2954
2955
2956
2957
2958 /* @funcstatic edialign_predFrontier **********************************
2959 **
2960 ** edialign_predFrontier
2961 **
2962 ** @param [w] clos [edialignCLOSURE*] Undocumented
2963 ** @param [r] x [ajint] Undocumented
2964 ** @param [r] i [ajint] Undocumented
2965 ** @param [r] y [ajint] Undocumented
2966 ** @return [ajint] Undocumented
2967 *****************************************************************************/
2968
edialign_predFrontier(edialignCLOSURE * clos,ajint x,ajint i,ajint y)2969 static ajint edialign_predFrontier(edialignCLOSURE *clos,
2970 ajint x, ajint i, ajint y)
2971 {
2972 ajint n;
2973 ajint k;
2974
2975 n = clos->seq[x].aligSetNbr[i];
2976
2977 if(n == 0)
2978 {
2979 k = clos->seq[x].predAligSetPos[i];
2980 if(k > 0)
2981 n = clos->seq[x].aligSetNbr[k];
2982 }
2983
2984 if(n > 0)
2985 return(clos->predFrontier[n][y]);
2986
2987 return(0);
2988 }
2989
2990
2991
2992
2993 /* on suppose que x!=y */
2994
2995
2996
2997
2998 /* @funcstatic edialign_succFrontier ****************************************
2999 **
3000 ** edialign_succFrontier
3001 **
3002 ** @param [w] clos [edialignCLOSURE*] Undocumented
3003 ** @param [r] x [ajint] Undocumented
3004 ** @param [r] i [ajint] Undocumented
3005 ** @param [r] y [ajint] Undocumented
3006 ** @return [ajint] Undocumented
3007 *****************************************************************************/
3008
edialign_succFrontier(edialignCLOSURE * clos,ajint x,ajint i,ajint y)3009 static ajint edialign_succFrontier(edialignCLOSURE *clos,
3010 ajint x, ajint i, ajint y)
3011 {
3012 ajint n;
3013 ajint k;
3014
3015 n = clos->seq[x].aligSetNbr[i];
3016
3017 if(n == 0)
3018 {
3019 k = clos->seq[x].succAligSetPos[i];
3020 if(k > 0)
3021 n = clos->seq[x].aligSetNbr[k];
3022 }
3023
3024 if(n > 0)
3025 return(clos->succFrontier[n][y]);
3026
3027 return(clos->seq[y].longueur+1);
3028 }
3029
3030
3031
3032
3033 /* @funcstatic edialign_anchor_check ****************************************
3034 **
3035 ** edialign_anchor_check
3036 **
3037 ** @param [r] s1 [ajint] Undocumented
3038 ** @param [r] s2 [ajint] Undocumented
3039 ** @param [r] b1 [ajint] Undocumented
3040 ** @param [r] b2 [ajint] Undocumented
3041 ** @param [r] l [ajint] Undocumented
3042 ** @param [r] scr [float] Undocumented
3043 ** @return [void]
3044 *****************************************************************************/
3045
edialign_anchor_check(ajint s1,ajint s2,ajint b1,ajint b2,ajint l,float scr)3046 static void edialign_anchor_check(ajint s1, ajint s2, ajint b1, ajint b2,
3047 ajint l, float scr)
3048 {
3049 (void) scr; /* make it used */
3050 if(
3051 ( s1 < 1 ) ||
3052 ( s1 > (ajint) seqnum )
3053 )
3054 {
3055 ajFatal(" \n\n wrong sequence # %d in anchoring file\n\n"
3056 " data set consists only of %u sequences \n\n",
3057 s1,seqnum);
3058 }
3059
3060 if(
3061 ( s2 < 1 ) ||
3062 ( s2 > (ajint) seqnum )
3063 )
3064 {
3065 ajFatal(" \n\n wrong sequence # %d in anchoring file\n\n"
3066 " data set consists only of %u sequences \n\n",
3067 s2,seqnum );
3068 }
3069
3070
3071 if( s1 == s2 )
3072 {
3073 ajFatal("\n strange data in anchoring file:\n"
3074 " sequence # %d anchored with itself.\n\n", s1 );
3075 }
3076
3077
3078
3079 /*
3080 if(
3081 ( b1 < 1 ) ||
3082 ( b1 + l - 1 > seqlen[ s1 - 1 ] )
3083 ) {
3084 printf(" \n\n anchor # %d starts", anc_num + 1 ) ;
3085 printf(" at position %d in sequence %d and has a length of %d.\n",
3086 b1, s1, l ) ;
3087 printf(" This does not fit into sequence # %d " , s1 );
3088 printf(" (sequence length = %d) \n\n", seqlen[ s1 - 1 ] ) ;
3089 printf(" PROGRAM TERMINATED \n\n" ) ;
3090 embExitBad() ;
3091 }
3092 */
3093
3094 if(
3095 ( b1 < 1 ) ||
3096 ( b1 + l - 1 > seqlen[ s1 - 1 ] )
3097 )
3098 {
3099 ajFatal(" \n\n anchor # %d starts"
3100 " at position %d in sequence %d\n "
3101 " and is %d residues in length.\n"
3102 " However, sequence %d"
3103 " is only %d residues in length \n\n",
3104 anc_num+1,b1,s1,l,s1,seqlen[s1-1]);
3105 }
3106
3107 if(
3108 ( b2 < 1 ) ||
3109 ( b2 + l - 1 > seqlen[ s2 - 1 ] )
3110 )
3111 {
3112 ajFatal(" \n\n anchor # %d starts"
3113 " at position %d in sequence %d\n "
3114 " and is %d residues in length.\n"
3115 " However, sequence %d"
3116 " is only %d residues in length \n\n",
3117 anc_num+1,b2,s2,l,s2,seqlen[s2 - 1]);
3118 }
3119
3120 return;
3121 }
3122
3123
3124
3125
3126 /* @funcstatic edialign_multi_anc_read **************************************
3127 **
3128 ** edialign_multi_anc_read
3129 **
3130 ** @param [u] file_name [char*] File name
3131 ** @return [ajint] Undocumented
3132 *****************************************************************************/
3133
edialign_multi_anc_read(char * file_name)3134 static ajint edialign_multi_anc_read(char *file_name)
3135 {
3136 char anc_file_name[NAME_LEN];
3137 FILE *fp;
3138 struct multi_frag *current_frg ;
3139 char line[10000];
3140 ajint len;
3141 ajint beg1;
3142 ajint beg2;
3143 ajint seq1;
3144 ajint seq2;
3145 float wgt;
3146
3147 strcpy( anc_file_name , file_name );
3148 strcat( anc_file_name , ".anc" );
3149
3150 if((fp = fopen( anc_file_name, "r")) == NULL)
3151 edialign_erreur("\n\n cannot find file with anchor points \n\n\n");
3152
3153 if(( anchor_frg = ( struct multi_frag * ) calloc( 1 ,
3154 sizeof( struct multi_frag ) ))
3155 == NULL) {
3156 printf(" problems with memory allocation "
3157 "for `anchor fragments' ! \n \n");
3158 embExitBad();
3159 }
3160
3161 current_frg = anchor_frg ;
3162
3163
3164 while(fgets(line , MLINE , fp ) != NULL )
3165 {
3166
3167 if(edialign_word_count( line ) == 6 )
3168 {
3169 sscanf(line,"%d %d %d %d %d %f ", &seq1 , &seq2 , &beg1, &beg2 ,
3170 &len , &wgt );
3171
3172 edialign_anchor_check( seq1 , seq2 , beg1, beg2 , len , wgt ) ;
3173
3174 seq1 = seq1 - 1 ;
3175 seq2 = seq2 - 1 ;
3176
3177 current_frg->s[0] = seq1 ;
3178 current_frg->s[1] = seq2 ;
3179 current_frg->b[0] = beg1 ;
3180 current_frg->b[1] = beg2 ;
3181 current_frg->ext = len ;
3182 current_frg->weight = wgt;
3183
3184
3185 current_frg->next = (struct multi_frag *)
3186 calloc( 1 , sizeof(struct multi_frag) );
3187
3188 current_frg = current_frg->next;
3189 anc_num++;
3190 }
3191 else
3192 {
3193 if(edialign_word_count( line ) != 0 )
3194 {
3195 ajFatal("\n\n Anchor file has wrong format. "
3196 "\n Each line must contain 6 numbers! \n"
3197 "\n Anchor file contains line \n\n"
3198 " %s \n", line);
3199 }
3200 }
3201 }
3202
3203 return 0;
3204 }
3205
3206
3207
3208
3209 /* @funcstatic edialign_frag_chain ******************************************
3210 **
3211 ** edialign_frag_chain
3212 **
3213 ** @param [r] n1 [ajint] Undocumented
3214 ** @param [r] n2 [ajint] Undocumented
3215 ** @param [u] fp1 [FILE*] Undocumented
3216 ** @param [u] fp_m [FILE*] Undocumented
3217 ** @param [u] number [ajuint*] Undocumented
3218 ** @return [float] Undocumented
3219 *****************************************************************************/
3220
edialign_frag_chain(ajint n1,ajint n2,FILE * fp1,FILE * fp_m,ajuint * number)3221 static float edialign_frag_chain(ajint n1, ajint n2, FILE *fp1, FILE *fp_m,
3222 ajuint *number)
3223 {
3224 /* pairwise alignment */
3225
3226
3227 /* `i' denotes positions in the 1. sequence ( seq[n1] ),
3228 `j' denotes positions in the 2. sequence ( seq[n2] ) */
3229
3230
3231 ajint mot_match[ MAX_DIA * 3 ];
3232
3233 ajint mot_match_num;
3234
3235 float mot_wgt_sum;
3236 float this_mot_wgt ;
3237
3238 float thr; /* threshold for the weight of fragments starting
3239 at a given point (i,j). For any new pair (i,j),
3240 thr = 0. However, if a fragment with positive weight w
3241 is found starting at (i,j), thr is defined to be w
3242 and any further fragment starting at (i,j) is
3243 taken into consideration, only if its weight excedes
3244 thr. This is, because it is not meaningful to consider
3245 a fragment containing another fragment with
3246 higher weight. */
3247 ajint i;
3248 ajint j;
3249 ajint k;
3250 ajint diff1;
3251 ajint diff2;
3252 ajint hv;
3253 ajint numsubseq;
3254 ajint ende2; /* denote the last position considered in the 2nd
3255 sequence. Coincides with seqlen[n2], respectively,
3256 exept if nucleotide diagonals are translated into
3257 peptide diagonals. In this case,
3258 ende2 = seqlen[n2]-2 */
3259 ajint start_a ; /* diagonals begining at a position (i,j) are only
3260 considered if the similarity-value at (i,j)
3261 exceeds a certain threshold, respectively if
3262 seq[n1][i] = seq[n2][j]. In this case the value
3263 of `start_a' is 1, otherwise the value is 0 */
3264 ajint start_pep = 0;
3265 ajint start_pep_c = 0;
3266 ajint start_dna = 0;
3267 ajint start_dna1;
3268 ajint trpl_start;
3269 ajint trans;
3270 ajint crick_wgt = 0;
3271 ajint match_p;
3272 ajint match_p_c;
3273 ajint match_d;
3274 ajint kmaxloc; /* maximum length of diagonals starting at a given
3275 position (i,j) of the dot matrix.
3276 kmaxloc = min{ max_dia,seqlen[n1]-i+1 ,seqlen[n2]-j+1} */
3277 ajint lmax_real ;
3278 ajint mnum = 0; /* number of current diagonal */
3279 ajint *ub_int;
3280 ajint *lb_int;
3281 ajint limit; /* min { ub_int[i] ; ende2 } */
3282 ajint bound_test; /* = 1 , if diagonal under consideration is consistent
3283 with ub_int and lb_int.
3284 = 0 , if not. */
3285 ajint max_nd = 0;
3286 ajint new_region = 0;
3287 ajint current_nd = 0;
3288 short accepted;
3289 char ch;
3290
3291 float total_sum;
3292 float wgt_k_match;
3293
3294 float thr2;
3295 float mot_wgt;
3296
3297 struct pair_frag **diap; /* diap[i] = pointer to last diagonal ending
3298 in the (i-1)-th column */
3299 struct pair_frag **prec_vec; /* prec_vec[j] = pointer to diagonal with
3300 maximum sum of weights accumulated
3301 at a given position (i,j) */
3302 struct pair_frag *current_dia, *hp, *cp, *cd;
3303
3304 FILE *fp_st;
3305 FILE *nd_fp;
3306
3307
3308 (void) fp_m; /* make it used */
3309
3310 /*
3311 printf( "\n in frag_chain: iter = %d wgt_type = %d \n\n", istep ,
3312 wgt_type );
3313 printf( "\n in frag_chain: iter = %d wgt_dna 20 = %f \n\n", istep ,
3314 wgt_dna[ 20 ][ 20 ] );
3315 */
3316
3317 if(print_status)
3318 if(seqnum > 20)
3319 {
3320 fp_st = fopen( pst_name ,"w");
3321
3322 fprintf(fp_st,"\n\n\n Status of the program run:\n");
3323 fprintf(fp_st," ==========================\n\n");
3324 fprintf(fp_st," %s \n\n", input_line);
3325 fprintf(fp_st," iteration step %d in multiple alignment\n\n",
3326 istep );
3327 fprintf(fp_st," aligning seq %d /", n1 + 1 );
3328 fprintf(fp_st," seq %d\n", n2 + 1);
3329 fprintf(fp_st," total number of");
3330 fprintf(fp_st," sequences: %u\n\n", seqnum);
3331 fprintf(fp_st,"\n\n\n" );
3332
3333 fclose(fp_st);
3334 }
3335
3336
3337
3338
3339 if((ub_int = (int *) calloc( ( seqlen[n1] + 3 ) , sizeof(int) ) ) == NULL)
3340 {
3341 printf("problems with memory allocation for ub_int! \n \n");
3342 embExitBad();
3343 }
3344
3345 if((lb_int = (int *) calloc( (seqlen[n1]+3) , sizeof(int) ) ) == NULL)
3346 {
3347 printf("problems with memory allocation for lb_int! \n \n");
3348 embExitBad();
3349 }
3350
3351 if((prec_vec = (struct pair_frag **)
3352 calloc( (seqlen[n2]+3) , sizeof(struct pair_frag *) ) ) == NULL)
3353 {
3354 printf("problems with memory allocation for prec_vec! \n \n");
3355 embExitBad();
3356 }
3357
3358 if(
3359 (diap = (struct pair_frag **) calloc( (seqlen[n1] + 3) ,
3360 sizeof(struct pair_frag *) )) ==
3361 NULL
3362 )
3363 {
3364 printf("\n \n \n ATTENTION: \n \n \n");
3365 printf(" problems with memory allocation\n");
3366 printf(" for diagonals! \n");
3367 embExitBad();
3368 }
3369
3370
3371 for(i = 1; i<= seqlen[n1]; i++)
3372 diap[i] = NULL;
3373
3374
3375 if( (diap[0] = (struct pair_frag *)
3376 calloc( 1 , sizeof(struct pair_frag) ) ) == NULL)
3377 {
3378 printf("problems with memory allocation for diap! \n \n");
3379 embExitBad();
3380 }
3381
3382 for( j = 1 ; j< seqlen[n2]+3 ; j++ )
3383 prec_vec[j] = diap[0];
3384
3385
3386 if( dia_pa_file )
3387 fp_dpa = fopen( dia_pa_name , "a" );
3388
3389 ende2 = seqlen[n2];
3390
3391 /* Calculation of rel_weight */
3392
3393
3394 if( iter_cond_prob == 0 )
3395 {
3396 if( wgt_type == 0 )
3397 edialign_rel_wgt_calc( seqlen[n1] , seqlen[n2] , wgt_prot );
3398 if( wgt_type % 2 )
3399 edialign_rel_wgt_calc( seqlen[n1] , seqlen[n2] , wgt_dna);
3400 if( wgt_type > 1 )
3401 edialign_rel_wgt_calc( seqlen[n1] , seqlen[n2] , wgt_trans);
3402
3403 if( istep == 1 )
3404 if( wgt_print || wgt_print_x ) {
3405 edialign_wgt_prnt( ) ;
3406 if( wgt_print_x )
3407 embExitBad() ;
3408 }
3409
3410 } /* if( iter_cond_prob == 0 ) */
3411
3412
3413
3414
3415 for( hv = 1 ; hv <= seqlen[ n1 ] ; hv++ )
3416 {
3417 lb_int[ hv ] = edialign_predFrontier( gabiosclos , n1 , hv , n2 );
3418 ub_int[ hv ] = edialign_succFrontier( gabiosclos , n1 , hv , n2 );
3419 if (lb_int[ hv ] != ub_int[ hv ])
3420 {
3421 lb_int[ hv ]++;
3422 ub_int[ hv ]--;
3423 }
3424 }
3425
3426 mnum = 0;
3427
3428 if( iter_cond_prob || ( istep == 1 ) )
3429 new_region = 1;
3430
3431 /* DP START */
3432
3433 for( i = 1 ; i <= seqlen[n1] ; i++ )
3434 {
3435
3436 if( open_pos[n1][n2][i] )
3437 {
3438 if( new_region )
3439 {
3440
3441 diff2 = ( edialign_succFrontier(gabiosclos, n1, i , n2)
3442 - edialign_predFrontier(gabiosclos, n1, i , n2) -1 );
3443
3444 if ( diff2 < 0 )
3445 diff2 = 0;
3446
3447 diff1 = ( edialign_succFrontier(gabiosclos, n2, lb_int[i] , n1)
3448 - edialign_predFrontier(gabiosclos, n2, lb_int[i] , n1)
3449 -1 ) ;
3450 if ( diff1 < 0 )
3451 diff1 = 0;
3452
3453 /*
3454 printf(" new region, i = %d diff = %d , %d \n", i, diff1 ,
3455 diff2 );
3456 */
3457
3458
3459 if( iter_cond_prob )
3460 if( ( diff1 > 0 ) && ( diff2 > 0 ) ) {
3461 if( wgt_type == 0 )
3462 edialign_rel_wgt_calc( diff1 , diff2 , wgt_prot );
3463 if( wgt_type % 2 )
3464 edialign_rel_wgt_calc( diff1 , diff2 , wgt_dna );
3465 if( wgt_type > 1 )
3466 edialign_rel_wgt_calc( diff1 , diff2 , wgt_trans );
3467 }
3468
3469 }
3470
3471 limit = edialign_mini2( ub_int[i] , ende2 );
3472 for( j = lb_int[i] ; j <= limit ; j++ )
3473 {
3474
3475 if( wgt_type != 1 )
3476 start_pep = ( sim_score[ amino[n1][i] ][ amino[n2][j] ]
3477 >= thr_sim_score );
3478
3479 if( crick_strand )
3480 start_pep_c = (sim_score[amino_c[n1][i]][amino_c[n2][j]]
3481 >= thr_sim_score );
3482
3483 if( wgt_type % 2 )
3484 {
3485 if( strict )
3486 start_dna = ( (seq[n1][i] == seq[n2][j]) &&
3487 ( seq[n1][i] == 'A' ||
3488 seq[n1][i] == 'C' ||
3489 seq[n1][i] == 'T' ||
3490 seq[n1][i] == 'G' ||
3491 seq[n1][i] == 'U' ) );
3492
3493 else
3494 start_dna = (seq[n1][i] == seq[n2][j]);
3495
3496 if( dna_speed )
3497 if( ( i < seqlen[n1] ) && ( j < limit ) )
3498 {
3499 if( strict )
3500 start_dna1 = ((seq[n1][i + 1] == seq[n2][j+1])
3501 &&
3502 (seq[n1][ i + 1 ] == 'A' ||
3503 seq[n1][ i + 1 ] == 'C' ||
3504 seq[n1][ i + 1 ] == 'T' ||
3505 seq[n1][ i + 1 ] == 'G' ||
3506 seq[n1][ i + 1 ] == 'U' ) );
3507
3508 else
3509 start_dna1 = (seq[n1][i+1] == seq[n2][j+1]);
3510 start_dna = start_dna * start_dna1 ;
3511 }
3512
3513 }
3514
3515 if( wgt_type != 1 )
3516 start_a = start_pep ;
3517 else
3518 start_a = start_dna ;
3519
3520 if( wgt_type == 3 )
3521 start_a = start_pep + start_dna ;
3522
3523 if( crick_strand )
3524 start_a = start_a + start_pep_c ;
3525
3526 if( self_comparison )
3527 if( i == j )
3528 start_a = 0 ;
3529
3530 if( exclude_frg )
3531 if( j == gl_exclude_list[ n1 ][ n2 ][ i ] )
3532 start_a = 0 ;
3533
3534 if( start_a )
3535 {
3536
3537 /*match = 0;*/
3538 match_d = 0;
3539 match_p = 0;
3540 match_p_c = 0;
3541 thr = 0;
3542 /*
3543 start_count++ ;
3544 */
3545 bound_test = 1;
3546
3547 if( wgt_type > 1 )
3548 lmax_real = lmax * 3 ;
3549 else
3550 lmax_real = lmax ;
3551
3552 kmaxloc =
3553 edialign_mini3(lmax_real , seqlen[n1]-i+1 ,
3554 seqlen[n2]-j+1 );
3555
3556 if( motifs )
3557 {
3558 for( k = 1 ; k <= kmaxloc ; k++ )
3559 if( ( mot_pos[ n1 ][ i + k - 1 ] == 1 ) &&
3560 ( mot_pos[ n2 ][ j + k - 1 ] == 1 ) ) {
3561 mot_match[ k ] = 1 ;
3562 /* printf(" match in %d %d %d \n",
3563 i, j, k ); */
3564 }
3565 else
3566 mot_match[ k ] = 0 ;
3567 }
3568
3569 /*******************\
3570 * *
3571 * fragments start *
3572 * *
3573 \*******************/
3574
3575 k = 1;
3576 mot_match_num = 0;
3577 mot_wgt_sum = 0 ;
3578
3579 while( ( k <= kmaxloc ) && start_a )
3580 {
3581 if( motifs )
3582 {
3583 if((( i- j)*(i-j)) <
3584 (max_mot_offset * max_mot_offset))
3585 if( k >= mot_len )
3586 if( mot_match[ k - mot_len + 1] )
3587 {
3588 /*mot_offset = ( i - j ) ;*/
3589 this_mot_wgt = edialign_mot_dist_factor
3590 ((i-j),
3591 mot_offset_factor);
3592
3593 /*
3594 printf(" i - j = %d , tmw = %f \n",
3595 i - j , this_mot_wgt );
3596 */
3597 mot_wgt_sum = mot_wgt_sum +
3598 this_mot_wgt;
3599 mot_match_num++ ;
3600
3601 }
3602 }
3603
3604 if( open_pos[n1][n2][ i + k - 1 ] )
3605 {
3606 bound_test = bound_test *
3607 ( j + k - 1 >= lb_int[ i + k - 1 ] );
3608 bound_test = bound_test *
3609 ( j + k - 1 <= ub_int[ i + k - 1 ] );
3610
3611 trpl_start = 0;
3612
3613 if( wgt_type < 2 )
3614 trans = 0 ;
3615 else
3616 trans = 1 ;
3617
3618 if( start_pep ||
3619 ( crick_strand && start_pep_c )
3620 )
3621 if((wgt_type > 1 ) && ((k % 3) == 1))
3622 {
3623 trpl_start = 1 ;
3624
3625 trpl_start = trpl_start *
3626 ( j + k >= lb_int[ i + k ] );
3627 trpl_start = trpl_start *
3628 ( j + k <= ub_int[ i + k ] );
3629 trpl_start = trpl_start *
3630 open_pos[ n1 ][ n2 ][ i + k ] ;
3631
3632
3633 trpl_start = trpl_start *
3634 ( j + k + 1 >= lb_int[ i + k + 1 ] );
3635 trpl_start = trpl_start *
3636 ( j + k + 1 <= ub_int[ i + k + 1 ] );
3637 trpl_start = trpl_start *
3638 open_pos[ n1 ][ n2 ][ i + k + 1 ] ;
3639 }
3640
3641
3642
3643 if(
3644 bound_test &&
3645 ( ( wgt_type != 2 ) || trpl_start )
3646 )
3647 {
3648 if( start_pep )
3649 if(
3650 ( wgt_type == 0 ) ||
3651 ( ( wgt_type > 1) && trpl_start )
3652 )
3653 match_p = match_p
3654 + sim_score[amino[n1][i+k-1]]
3655 [amino[n2][j+k-1]];
3656
3657 if( crick_strand )
3658 if( start_pep_c )
3659 if(
3660 ( wgt_type == 0 ) ||
3661 ( ( wgt_type > 1) && trpl_start )
3662 )
3663 match_p_c = match_p_c
3664 + sim_score[amino_c[n1][i+k-1]]
3665 [amino_c[n2][j+k-1]];
3666
3667
3668 if( start_dna )
3669 if( wgt_type % 2 )
3670 if( !strict ||
3671 (seq[n1][i+k-1] == 'A' ||
3672 seq[n1][i+k-1] == 'C' ||
3673 seq[n1][i+k-1] == 'T' ||
3674 seq[n1][i+k-1] == 'G' ||
3675 seq[n1][i+k-1] == 'U' ))
3676 match_d = match_d +
3677 (seq[n1][i+k-1] ==
3678 seq[n2][j+k-1] );
3679
3680 wgt_k_match = 0;
3681
3682
3683 if( wgt_type == 0 )
3684 wgt_k_match = wgt_prot[ k ][ match_p ];
3685 if( wgt_type == 1 )
3686 wgt_k_match = wgt_dna[ k ][ match_d ];
3687
3688 if( wgt_type > 1 )
3689 {
3690 if( start_pep )
3691 wgt_k_match = wgt_trans[(k+2) / 3]
3692 [match_p];
3693
3694 if( crick_strand )
3695 if( start_pep_c ) {
3696 if( wgt_trans[ ( k + 2 ) / 3 ]
3697 [ match_p_c ] > wgt_k_match )
3698 {
3699 wgt_k_match = wgt_trans[(k+2) /
3700 3 ][ match_p_c ] ;
3701 crick_wgt = 1 ;
3702 }
3703 else
3704 crick_wgt = 0 ;
3705 }
3706 }
3707
3708 if( start_dna )
3709 if( wgt_type == 3 )
3710 if( k <= lmax )
3711 if( wgt_dna[ k ][ match_d ]
3712 > wgt_k_match ) {
3713 wgt_k_match = wgt_dna[k]
3714 [match_d];
3715 trans = 0 ;
3716 }
3717
3718
3719
3720 if( wgt_type == 0 )
3721 if( match_p <= ( k * av_sim_score_pep ) )
3722 start_pep = 0;
3723
3724 if( wgt_type == 1 )
3725 if( match_d <= ( k * av_sim_score_nuc ) )
3726 start_dna = 0;
3727
3728 if( start_pep )
3729 if( wgt_type > 1 )
3730 if((match_p * 3 ) <=
3731 (k * av_sim_score_pep))
3732 start_pep = 0;
3733
3734 if( start_pep_c )
3735 if( wgt_type > 1 )
3736 if((match_p_c * 3 ) <=
3737 (k * av_sim_score_pep))
3738 start_pep_c = 0;
3739
3740
3741 if( wgt_type != 1 )
3742 start_a = start_pep ;
3743 else
3744 start_a = start_dna ;
3745
3746 if( wgt_type == 3 )
3747 start_a = start_pep + start_dna ;
3748
3749 if( crick_strand )
3750 start_a = start_a + start_pep_c ;
3751
3752 if( exclude_frg )
3753 if(gl_exclude_list[ n1 ][ n2 ][ i + k ] == j +
3754 k )
3755 start_a = 0 ;
3756
3757
3758 if( motifs )
3759 if( mot_wgt_sum > 0 ) {
3760 fprintf( fp_mot , " %4d %4d ",
3761 n1 + 1, n2 + 1);
3762 fprintf( fp_mot , " %4d %4d %3d ",
3763 i, j, k );
3764 fprintf( fp_mot , " %5.2f ",
3765 wgt_k_match );
3766 mot_wgt = mot_wgt_sum * mot_factor ;
3767 wgt_k_match = wgt_k_match + mot_wgt ;
3768 fprintf( fp_mot , " %2d ",
3769 mot_match_num );
3770 fprintf( fp_mot , " %5.2f \n",
3771 wgt_k_match );
3772 }
3773
3774 /*
3775 if( wgt_k_match > 0 )
3776 printf(" k = %d min_dia = %d "
3777 "wgt_k_match = %f thr = %f \n",
3778 k, min_dia , wgt_k_match , thr );
3779 */
3780 if( k >= min_dia )
3781 if( wgt_k_match > thr )
3782 {
3783 if( (current_dia = (struct pair_frag *)
3784 calloc(1 ,
3785 sizeof(struct pair_frag) ))
3786 == NULL )
3787 {
3788 printf("\n \n \n ATTENTION:"
3789 " \n \n \n");
3790 printf(" too many diagonals "
3791 "in\n");
3792 printf(" pairwise alignment "
3793 "of");
3794 printf(" sequences\n");
3795 printf(" %s and ",
3796 seq_name[n1]);
3797 printf("%s\n \n \n \n",
3798 seq_name[n2]);
3799
3800 fprintf(fp1,"\n \n ATTENTION:"
3801 "\n \n");
3802 fprintf(fp1," too many "
3803 "diagonals\n");
3804 fprintf(fp1," in pairwise "
3805 "alignment");
3806 fprintf(fp1," of sequences\n");
3807 fprintf(fp1," %s and ",
3808 seq_name[n1]);
3809 fprintf(fp1,"%s\n \n \n \n",
3810 seq_name[n2]);
3811
3812 embExitBad();
3813 }
3814
3815
3816
3817 current_dia->b1 = i;
3818 current_dia->b2 = j;
3819 current_dia->ext = k + 2 * trans ;
3820 current_dia->weight = wgt_k_match ;
3821 current_dia->trans = trans ;
3822 current_dia->cs = crick_wgt ;
3823 current_dia->sum = current_dia->weight
3824 + (prec_vec[j])->sum ;
3825 current_dia->prec = prec_vec[j];
3826 current_dia->last
3827 = diap[ i + (current_dia->ext)];
3828 diap[i+(current_dia->ext)] =
3829 current_dia;
3830
3831 mnum++;
3832
3833 if(print_max_nd)
3834 {
3835 current_nd++ ;
3836 if( current_nd > max_nd )
3837 max_nd = current_nd;
3838 }
3839
3840 dia_num++;
3841 if(afc_file)
3842 {
3843 fprintf(fp_dia,"FRG %d ",
3844 dia_num );
3845 fprintf(fp_dia,"name: %s ",
3846 seq_name[ n1 ]);
3847 fprintf(fp_dia," %s ",
3848 seq_name[ n2 ]);
3849 if( seqnum > 2 )
3850 {
3851 fprintf(fp_dia," seq: %d",
3852 n1 + 1 );
3853 fprintf(fp_dia," %d" ,
3854 n2 + 1 );
3855 }
3856 fprintf(fp_dia," beg: %d %d",
3857 i, j );
3858 fprintf(fp_dia," len: %d",
3859 current_dia->ext );
3860 fprintf(fp_dia," wgt: %6.3f",
3861 current_dia->weight );
3862
3863 /*
3864 if( BETA )
3865 if( iter_cond_prob )
3866 {
3867 fprintf(fp_dia," d1 = %d "
3868 "d2 = %d ", diff1, diff2 );
3869 }
3870 */
3871
3872 fprintf(fp_dia," it = %d ",
3873 istep );
3874 if( ( wgt_type == 3 ) ||
3875 crick_strand )
3876 {
3877 if( current_dia->trans )
3878 fprintf(fp_dia," P-frg" );
3879 else
3880 fprintf(fp_dia," N-frg" );
3881 }
3882
3883 fprintf(fp_dia,"\n");
3884 if( afc_filex ) {
3885 fprintf(fp_dia,"SEG1 ");
3886 for(hv = 0;hv <
3887 current_dia->ext ; hv++)
3888 {
3889 ch = seq[n1][ i + hv ];
3890 fprintf(fp_dia,"%c" , ch);
3891 }
3892 fprintf(fp_dia ,"\n");
3893 fprintf(fp_dia,"SEG2 ");
3894 for(hv = 0;hv <
3895 current_dia->ext ; hv++)
3896 {
3897 ch = seq[n2][ j + hv ] ;
3898 fprintf(fp_dia,"%c" , ch);
3899 }
3900 fprintf(fp_dia ,"\n\n");
3901 }
3902 }
3903
3904 if( ! redundant )
3905 {
3906 thr2 = edialign_maxf2(thr,
3907 (current_dia->weight));
3908 thr = thr2 ;
3909 }
3910
3911 } /* if( wgt[k][match] > thr ) */
3912 } /* if ( bound_test ) */
3913 } /* if( open_pos ... ) */
3914 k++;
3915 } /* while( ( k <= kmaxloc ) && start_a ) */
3916 } /* if( start_a ) */
3917 } /* for(j=lb_int[i];j<=limit;j++) */
3918 new_region = 0;
3919 } /* if( open_pos ) */
3920 else
3921 if( iter_cond_prob )
3922
3923 new_region = 1;
3924
3925
3926 if(print_status)
3927 if((( seqlen[n1] + seqlen[n2] ) > 1000))
3928 if( ! ( i % 100 ) )
3929 {
3930 fp_st = fopen( pst_name ,"w");
3931
3932 fprintf(fp_st,"\n\n\n Status of the program run:\n");
3933 fprintf(fp_st," ==========================\n\n");
3934 fprintf(fp_st," %s \n\n", input_line);
3935 if( seqnum > 2 )
3936 {
3937 fprintf(fp_st," iteration step %d in" , istep );
3938 fprintf(fp_st," multiple alignment\n\n" );
3939 }
3940 if( seqnum > 2 )
3941 {
3942 fprintf(fp_st," aligning seq %d /", n1 + 1 );
3943 fprintf(fp_st," seq %d\n", n2 + 1);
3944 fprintf(fp_st," total number of");
3945 fprintf(fp_st," sequences: %u\n\n", seqnum);
3946 }
3947 fprintf(fp_st," current position in");
3948 fprintf(fp_st," sequence %d: %8d\n", n1 + 1, i);
3949 fprintf(fp_st," length of seq %d:", n1 + 1 );
3950
3951 fprintf(fp_st," %8d\n\n", seqlen[n1]);
3952
3953
3954 /*
3955 if( iter_cond_prob || ( istep == 1 ) )
3956 {
3957 if( open_pos[n1][n2][i] )
3958 {
3959 fprintf(fp_st," diff1 = %d \n", diff1 );
3960 fprintf(fp_st," diff2 = %d \n", diff2 );
3961 }
3962 else
3963 fprintf(fp_st," position already aligned");
3964 }
3965 */
3966
3967 fprintf(fp_st,"\n\n\n" );
3968
3969 fclose(fp_st);
3970 }
3971
3972
3973
3974 cp = diap[ i + 1 ];
3975 hp = NULL;
3976 accepted = 0;
3977
3978
3979 while( cp != NULL )
3980 {
3981 j = cp->b2 + cp->ext;
3982 if( (prec_vec[j])->sum < cp->sum )
3983 {
3984 prec_vec[j] = cp;
3985 accepted = 1;
3986
3987 hp = cp;
3988 cp = cp->last;
3989 }
3990 else
3991 {
3992 cp = cp->last;
3993
3994 if( accepted )
3995 {
3996 free( hp->last );
3997
3998
3999 hp->last = cp;
4000 }
4001 else
4002 {
4003 free( diap[ i + 1 ] );
4004 diap[ i + 1 ] = cp;
4005 }
4006
4007 current_nd--;
4008
4009 }
4010 }
4011
4012 for( hv=2 ; hv < ( seqlen[n2] + 3 ) ; hv++ )
4013 if( (prec_vec[hv])->sum < (prec_vec[hv-1])->sum )
4014 prec_vec[hv] = prec_vec[hv-1];
4015
4016
4017
4018 } /* for(i= ... ) */
4019 /*
4020 printf (" start_count = %d \n ", start_count );
4021 */
4022 if( pr_av_max_nd )
4023 if( istep == 1 )
4024 {
4025 if( ( nd_fp = fopen("nd_file","a")) == NULL)
4026 {
4027 printf("\n\n nd_fp could not be opened \n\n" );
4028 embExitBad();
4029 }
4030
4031 fprintf(nd_fp, " %2d/%2d %8d %8d \n",n1+1,n2+1,mnum,max_nd);
4032 fclose( nd_fp );
4033 }
4034
4035
4036 numsubseq = 0; /* counts diagonals in alignment */
4037
4038 current_dia = prec_vec[ seqlen[n2] + 1 ];
4039 cd = current_dia;
4040
4041
4042 total_sum = cd->sum;
4043
4044
4045 while( cd->prec != NULL )
4046 {
4047 numsubseq++;
4048 cd = cd->prec;
4049 }
4050
4051
4052
4053 if(numsubseq)
4054 {
4055 hv = numsubseq - 1;
4056
4057 if (
4058 (
4059 pair_dia = (struct multi_frag *)
4060 calloc( ( numsubseq + 1 ) , sizeof(struct multi_frag))) == NULL)
4061 {
4062 printf("problems with memory allocation for `pair_dia'! \n \n");
4063 embExitBad();
4064 }
4065
4066
4067 while(hv>=0)
4068 {
4069 if( dia_pa_file )
4070 {
4071 fprintf(fp_dpa, " %3d) " , ++dcount );
4072 if( seqnum > 2 )
4073 fprintf(fp_dpa, "seq: %3d %3d " , n1 + 1, n2 + 1);
4074 fprintf(fp_dpa, " beg: %6d %6d ", current_dia->b1,
4075 current_dia->b2);
4076 fprintf(fp_dpa, " len: %2d ", current_dia->ext );
4077 fprintf(fp_dpa, " weight: %5.2f ", current_dia->weight );
4078 fprintf(fp_dpa, " it: %d ", istep );
4079 if( ( wgt_type == 3 ) || crick_strand )
4080 {
4081 if( current_dia->trans )
4082 fprintf(fp_dpa," P-frg" );
4083 else
4084 fprintf(fp_dpa," N-frg" );
4085 }
4086
4087 if( current_dia->trans )
4088 if( crick_strand)
4089 {
4090 if( current_dia->cs )
4091 fprintf(fp_dpa," crick " ) ;
4092 else
4093 fprintf(fp_dpa," watson " ) ;
4094 }
4095 fprintf(fp_dpa,"\n");
4096 }
4097
4098
4099 (pair_dia[hv]).b[0] = current_dia->b1;
4100 (pair_dia[hv]).b[1] = current_dia->b2;
4101 (pair_dia[hv]).s[0] = n1;
4102 (pair_dia[hv]).s[1] = n2;
4103 (pair_dia[hv]).sel = 1;
4104 (pair_dia[hv]).ext = current_dia->ext;
4105 (pair_dia[hv]).weight = current_dia->weight;
4106 (pair_dia[hv]).ow = current_dia->weight;
4107 (pair_dia[hv]).trans = current_dia->trans ;
4108 if( crick_strand )
4109 {
4110 (pair_dia[hv]).cs = current_dia->cs ;
4111 }
4112 (pair_dia[hv]).it = istep;
4113 hv--;
4114 current_dia = current_dia->prec;
4115 }
4116 /* if( dia_pa_file )
4117 fprintf(fp_dpa, " \n" );
4118 */
4119
4120
4121 /* modified in LGI-VITRY
4122 if( iter_cond_prob )
4123 */
4124
4125 cont_it = 1 ;
4126
4127 } /* if(numsubseq) */
4128
4129
4130
4131 *number = numsubseq;
4132
4133
4134 if( long_output )
4135 {
4136 printf("Seq. %3d -%3d: ", n1+1, n2+1);
4137 printf("T = %2.2f,", threshold);
4138 printf(" %3u D. in alignment,", *number);
4139 printf("%6d D. in matrix", mnum);
4140 printf("\n");
4141 }
4142
4143
4144
4145 for( hv=0 ; hv < seqlen[n1]+3 ; hv++ )
4146 {
4147 current_dia = diap[hv];
4148
4149 while( current_dia != NULL )
4150 {
4151 hp = current_dia;
4152 current_dia = current_dia->last;
4153 free(hp);
4154 }
4155 }
4156
4157
4158 if( istep == 1 )
4159 {
4160 max_dia_num = max_dia_num + max_nd ;
4161 }
4162
4163 free(diap);
4164
4165 free(ub_int);
4166 free(lb_int);
4167 free(prec_vec);
4168 if( dia_pa_file )
4169 {
4170 fclose( fp_dpa );
4171 }
4172
4173 return( total_sum);
4174 }
4175
4176
4177
4178
4179 #if 0
4180 /* @funcstatic edialign_num_test ******************************************
4181 **
4182 ** edialign_num_test
4183 **
4184 ** @param [u] cp [char*] Undocumented
4185 ** @return [ajint] Undocumented
4186 *****************************************************************************/
4187
4188 static ajint edialign_num_test( char *cp )
4189 {
4190 ajint result = 1;
4191 ajint i;
4192 char *strng;
4193 ajint len;
4194
4195 strng = cp;
4196
4197 len = strlen(strng);
4198
4199 for(i = 0 ; i < len ; i++ )
4200 if( ! isdigit(strng[i]) && ( strng[i] != '.' ) )
4201 {
4202 result = 0;
4203 /* printf("\n %c is no digit !!!\n", strng[i]); */
4204 }
4205
4206 return result ;
4207 }
4208 #endif
4209
4210
4211
4212
4213 /* @funcstatic edialign_minf2 ******************************************
4214 **
4215 ** edialign_minf2
4216 **
4217 ** @param [r] a [float] Undocumented
4218 ** @param [r] b [float] Undocumented
4219 ** @return [float] Undocumented
4220 *****************************************************************************/
4221
edialign_minf2(float a,float b)4222 static float edialign_minf2(float a, float b)
4223 {
4224 if (a<b)
4225 return a;
4226
4227 return b;
4228 }
4229
4230
4231
4232
4233 /* @funcstatic edialign_maxf2 ******************************************
4234 **
4235 ** edialign_maxf2
4236 **
4237 ** @param [r] a [float] Undocumented
4238 ** @param [r] b [float] Undocumented
4239 ** @return [float] Undocumented
4240 *****************************************************************************/
4241
edialign_maxf2(float a,float b)4242 static float edialign_maxf2(float a, float b)
4243 {
4244 if (a>b)
4245 return a;
4246
4247 return b;
4248 }
4249
4250
4251
4252
4253 /* @funcstatic edialign_mini2 ******************************************
4254 **
4255 ** edialign_mini2
4256 **
4257 ** @param [r] a [ajint] Undocumented
4258 ** @param [r] b [ajint] Undocumented
4259 ** @return [ajint] Undocumented
4260 *****************************************************************************/
4261
edialign_mini2(ajint a,ajint b)4262 static ajint edialign_mini2(ajint a, ajint b)
4263 {
4264 if(a<b)
4265 return a;
4266
4267 return b;
4268 }
4269
4270
4271
4272
4273 /* @funcstatic edialign_minu2 ******************************************
4274 **
4275 ** edialign_mini2
4276 **
4277 ** @param [r] a [ajint] Undocumented
4278 ** @param [r] b [ajint] Undocumented
4279 ** @return [ajuint] Undocumented
4280 *****************************************************************************/
4281
edialign_minu2(ajint a,ajint b)4282 static ajuint edialign_minu2(ajint a, ajint b)
4283 {
4284 ajuint ia;
4285 ajuint ib;
4286
4287 if(a<0)
4288 ia = 0;
4289 else
4290 ia = a;
4291
4292 if(b<0)
4293 ib = 0;
4294 else
4295 ib = b;
4296
4297 if(a<b)
4298 return ia;
4299
4300 return ib;
4301 }
4302
4303
4304
4305
4306 /* @funcstatic edialign_maxi2 ******************************************
4307 **
4308 ** edialign_maxi2
4309 **
4310 ** @param [r] a [ajint] Undocumented
4311 ** @param [r] b [ajint] Undocumented
4312 ** @return [ajint] Undocumented
4313 *****************************************************************************/
4314
edialign_maxi2(ajint a,ajint b)4315 static ajint edialign_maxi2(ajint a, ajint b)
4316 {
4317 if (a>b)
4318 return a;
4319
4320 return b;
4321 }
4322
4323
4324
4325
4326 /* @funcstatic edialign_mini3 ************************************************
4327 **
4328 ** edialign_mini3
4329 **
4330 ** @param [r] a [ajint] Undocumented
4331 ** @param [r] b [ajint] Undocumented
4332 ** @param [r] c [ajint] Undocumented
4333 ** @return [ajint] Undocumented
4334 *****************************************************************************/
4335
edialign_mini3(ajint a,ajint b,ajint c)4336 static ajint edialign_mini3(ajint a, ajint b, ajint c)
4337 {
4338 return edialign_mini2(a, edialign_mini2(b,c));
4339 }
4340
4341
4342
4343
4344 #if 0
4345 /* @funcstatic edialign_minf ************************************************
4346 **
4347 ** edialign_minf
4348 **
4349 ** @param [u] a [float*] Undocumented
4350 ** @param [r] b [float] Undocumented
4351 ** @return [void]
4352 *****************************************************************************/
4353
4354 static void edialign_minf(float *a, float b)
4355 {
4356 if (*a > b)
4357 *a = b;
4358
4359 return;
4360 }
4361 #endif
4362
4363
4364
4365
4366 /* @funcstatic edialign_mini ************************************************
4367 **
4368 ** edialign_mini
4369 **
4370 ** @param [u] a [ajint*] Undocumented
4371 ** @param [r] b [ajint] Undocumented
4372 ** @return [void]
4373 *****************************************************************************/
4374
edialign_mini(ajint * a,ajint b)4375 static void edialign_mini(ajint *a, ajint b)
4376 {
4377 if (*a > b)
4378 *a = b;
4379
4380 return;
4381 }
4382
4383
4384
4385
4386 #if 0
4387 /* @funcstatic edialign_maxf ************************************************
4388 **
4389 ** edialign_maxf. Unused.
4390 **
4391 ** @param [u] a [float*] Undocumented
4392 ** @param [r] b [float] Undocumented
4393 ** @return [void]
4394 *****************************************************************************/
4395
4396 static void edialign_maxf(float *a, float b)
4397 {
4398 if (*a < b)
4399 *a = b;
4400
4401 return;
4402 }
4403 #endif
4404
4405
4406
4407
4408 /* @funcstatic edialign_maxi ************************************************
4409 **
4410 ** edialign_maxi
4411 **
4412 ** @param [u] a [ajint*] Undocumented
4413 ** @param [r] b [ajint] Undocumented
4414 ** @return [void]
4415 *****************************************************************************/
4416
edialign_maxi(ajint * a,ajint b)4417 static void edialign_maxi(ajint *a, ajint b)
4418 {
4419 if (*a < b)
4420 *a = b;
4421
4422 return;
4423 }
4424
4425
4426
4427
4428 /* @funcstatic edialign_maxu ************************************************
4429 **
4430 ** edialign_maxu
4431 **
4432 ** @param [u] a [ajuint*] Undocumented
4433 ** @param [r] b [ajuint] Undocumented
4434 ** @return [void]
4435 *****************************************************************************/
4436
edialign_maxu(ajuint * a,ajuint b)4437 static void edialign_maxu(ajuint *a, ajuint b)
4438 {
4439 if (*a < b)
4440 *a = b;
4441
4442 return;
4443 }
4444
4445
4446
4447
4448 /* @funcstatic edialign_invert **********************************************
4449 **
4450 ** edialign_invert
4451 **
4452 ** @param [r] c1 [char] Undocumented
4453 ** @return [char] Undocumented
4454 *****************************************************************************/
4455
edialign_invert(char c1)4456 static char edialign_invert ( char c1 )
4457 {
4458 char c2 = c1;
4459
4460 if(c1 == 'T')
4461 c2 = 'A' ;
4462 if(c1 == 'A')
4463 c2 = 'T' ;
4464 if(c1 == 'C')
4465 c2 = 'G' ;
4466 if(c1 == 'G')
4467 c2 = 'C' ;
4468
4469 return( c2 );
4470 }
4471
4472
4473
4474
4475 /* @funcstatic edialign_translate *******************************************
4476 **
4477 ** edialign_translate
4478 **
4479 ** @param [r] c1 [char] Undocumented
4480 ** @param [r] c2 [char] Undocumented
4481 ** @param [r] c3 [char] Undocumented
4482 ** @param [r] seqno [ajint] Undocumented
4483 ** @param [r] pos [ajint] Undocumented
4484 ** @return [ajint] Undocumented
4485 *****************************************************************************/
4486
edialign_translate(char c1,char c2,char c3,ajint seqno,ajint pos)4487 static ajint edialign_translate(char c1, char c2 ,char c3, ajint seqno,
4488 ajint pos)
4489 {
4490 /* translation of triplets into amino acids */
4491
4492
4493 ajint amac = 0; /* resulting amino acid */
4494
4495 (void) seqno; /* make it used */
4496 (void) pos; /* make it used */
4497
4498 if(c1 == 'T')
4499 {
4500 if(c2 == 'T')
4501 {
4502 if(c3 == 'T') amac = 18;
4503 if(c3 == 'C') amac = 18;
4504 if(c3 == 'A') amac = 16;
4505 if(c3 == 'G') amac = 16;
4506 }
4507 if(c2 == 'C') amac = 2;
4508 if(c2 == 'A')
4509 {
4510 if(c3 == 'T') amac = 19;
4511 if(c3 == 'C') amac = 19;
4512 if(c3 == 'A') amac = 0; /* stop codon */
4513 if(c3 == 'G') amac = 0;
4514 }
4515 if(c2 == 'G')
4516 {
4517 if(c3 == 'T') amac = 1;
4518 if(c3 == 'C') amac = 1;
4519 if(c3 == 'A') amac = 20;
4520
4521 if(c3 == 'G') amac = 20;
4522 }
4523 }
4524
4525 if(c1 == 'C')
4526 {
4527 if(c2 == 'T') amac = 16;
4528 if(c2 == 'C') amac = 4;
4529 if(c2 == 'A')
4530 {
4531 if(c3 == 'T') amac = 11;
4532 if(c3 == 'C') amac = 11;
4533 if(c3 == 'A') amac = 10;
4534 if(c3 == 'G') amac = 10;
4535 }
4536 if(c2 == 'G') amac = 12;
4537 }
4538
4539 if(c1 == 'A')
4540 {
4541 if(c2 == 'T')
4542 {
4543 if(c3 == 'T') amac = 15;
4544 if(c3 == 'C') amac = 15;
4545 if(c3 == 'A') amac = 15;
4546 if(c3 == 'G') amac = 14;
4547 }
4548 if(c2 == 'C') amac = 3;
4549 if(c2 == 'A')
4550 {
4551 if(c3 == 'T') amac = 7;
4552 if(c3 == 'C') amac = 7;
4553 if(c3 == 'A') amac = 13;
4554 if(c3 == 'G') amac = 13;
4555 }
4556 if(c2 == 'G')
4557 {
4558 if(c3 == 'T') amac = 2;
4559 if(c3 == 'C') amac = 2;
4560 if(c3 == 'A') amac = 12;
4561 if(c3 == 'G') amac = 12;
4562 }
4563 }
4564
4565 if(c1 == 'G')
4566 {
4567 if(c2 == 'T') amac = 17;
4568 if(c2 == 'C') amac = 5;
4569 if(c2 == 'A')
4570 {
4571 if(c3 == 'T') amac = 8;
4572 if(c3 == 'C') amac = 8;
4573 if(c3 == 'A') amac = 9;
4574 if(c3 == 'G') amac = 9;
4575 }
4576 if(c2 == 'G') amac = 6;
4577 }
4578
4579
4580 if(
4581 ( c1 != 'A' && c1 != 'T' && c1 != 'G' && c1 != 'C' ) ||
4582 ( c2 != 'A' && c2 != 'T' && c2 != 'G' && c2 != 'C' ) ||
4583 ( c3 != 'A' && c3 != 'T' && c3 != 'G' && c3 != 'C' )
4584 )
4585 return( 0 );
4586
4587 return( amac );
4588 } /* translate */
4589
4590
4591
4592
4593 #if 0
4594 /* @funcstatic edialign_int_test ********************************************
4595 **
4596 ** edialign_int_test. Unused.
4597 **
4598 ** @param [r] f [float] Undocumented
4599 ** @return [ajint] Undocumented
4600 *****************************************************************************/
4601
4602 static ajint edialign_int_test(float f)
4603 {
4604 ajint i = f;
4605
4606 if(i == f)
4607 return (1);
4608
4609 return (0);
4610 }
4611 #endif
4612
4613
4614
4615
4616 /* @funcstatic edialign_change ***********************************************
4617 **
4618 ** edialign_change
4619 **
4620 ** @param [u] a [struct multi_frag*] Undocumented
4621 ** @param [u] b [struct multi_frag*] Undocumented
4622 ** @return [void]
4623 *****************************************************************************/
4624
edialign_change(struct multi_frag * a,struct multi_frag * b)4625 static void edialign_change(struct multi_frag *a, struct multi_frag *b)
4626 {
4627 struct multi_frag c;
4628 struct multi_frag *an;
4629 struct multi_frag *bn;
4630
4631 c = *a;
4632 an = a->next;
4633 bn = b->next;
4634
4635 *a = *b;
4636 *b = c;
4637
4638 a->next = an;
4639 b->next = bn;
4640
4641 return;
4642 }
4643
4644
4645
4646
4647 #if 0
4648 /* @funcstatic edialign_pair_change *****************************************
4649 **
4650 ** edialign_pair_change. Unused.
4651 **
4652 ** @param [u] a [struct seq_pair*] Undocumented
4653 ** @param [u] b [struct seq_pair*] Undocumented
4654 ** @return [void]
4655 *****************************************************************************/
4656
4657 static void edialign_pair_change(struct seq_pair *a, struct seq_pair *b)
4658 {
4659 struct seq_pair c;
4660
4661 c = *a;
4662 *a = *b;
4663 *b = c;
4664
4665 return;
4666 }
4667 #endif
4668
4669
4670
4671
4672 /* @funcstatic edialign_ow_bubble_sort **************************************
4673 **
4674 ** edialign_ow_bubble_sort
4675 **
4676 ** @param [r] number [int] Undocumented
4677 ** @param [u] dp [struct multi_frag*] Undocumented
4678 ** @return [void]
4679 *****************************************************************************/
4680
edialign_ow_bubble_sort(int number,struct multi_frag * dp)4681 static void edialign_ow_bubble_sort( int number , struct multi_frag *dp )
4682 {
4683 /* sorting diagonals in multiple alignment according to their
4684 overlap weights */
4685
4686 struct multi_frag *hp;
4687 ajint hv1;
4688 ajint hv2;
4689
4690 FILE *fp_st;
4691
4692 for( hv1 = 1 ; hv1 < number ; hv1++ )
4693 {
4694 hp = dp;
4695
4696 if( print_status )
4697 if( ( hv1 % 100 ) == 0 )
4698 {
4699 fp_st = fopen( pst_name ,"w");
4700
4701 fprintf(fp_st,"\n\n\n Status of the program run:\n");
4702 fprintf(fp_st," ==========================\n\n");
4703 fprintf(fp_st," %s \n\n", input_line);
4704 fprintf(fp_st," iteration step %d in multiple "
4705 "alignment\n", istep );
4706 fprintf(fp_st," overlap weight sorting of diagonals\n");
4707 fprintf(fp_st," current diagonal = %d\n\n", hv1 );
4708 fprintf(fp_st," total number of");
4709 fprintf(fp_st," diagonals: %d\n\n\n\n", number);
4710 fclose(fp_st);
4711 }
4712
4713
4714 for( hv2 = hv1 ; hv2 < number ; hv2++ )
4715 {
4716 if( hp->ow < (hp->next)->ow )
4717 edialign_change( hp , hp->next );
4718 hp = hp->next;
4719 }
4720 }
4721
4722 return;
4723 } /* ow_bubble_sort */
4724
4725
4726
4727
4728 /* @funcstatic edialign_bubble_sort **************************************
4729 **
4730 ** edialign_bubble_sort
4731 **
4732 ** @param [r] number [ajint] Undocumented
4733 ** @param [u] dp [struct multi_frag*] Undocumented
4734 ** @return [void]
4735 *****************************************************************************/
4736
edialign_bubble_sort(ajint number,struct multi_frag * dp)4737 static void edialign_bubble_sort(ajint number , struct multi_frag *dp )
4738 {
4739 /* sorting diagonals in multiple alignment according to their
4740 individual weights */
4741
4742 struct multi_frag *hp;
4743 int hv1, hv2;
4744 FILE *fp_st;
4745
4746 for( hv1 = 1 ; hv1 < number ; hv1++ )
4747 {
4748 hp = dp;
4749
4750 if( print_status )
4751 if( ( hv1 % 100 ) == 0 )
4752 {
4753 fp_st = fopen( pst_name ,"w");
4754
4755 fprintf(fp_st,"\n\n\n Status of the program run:\n");
4756 fprintf(fp_st," ==========================\n\n");
4757 fprintf(fp_st," %s \n\n", input_line);
4758 fprintf(fp_st," iteration step %d\n", istep );
4759 fprintf(fp_st," ind. weight sorting of diagonals\n");
4760 fprintf(fp_st," current diagonal = %d\n\n", hv1 );
4761 fprintf(fp_st," total number of");
4762 fprintf(fp_st," diagonals: %d\n\n\n\n", number);
4763 fclose(fp_st);
4764 }
4765
4766
4767 for( hv2 = hv1 ; hv2 < number ; hv2++ )
4768 {
4769 if( hp->weight < (hp->next)->weight )
4770 edialign_change( hp , hp->next );
4771 hp = hp->next;
4772 }
4773 }
4774
4775 return;
4776 } /* bubble_sort */
4777
4778
4779
4780
4781 /* @funcstatic edialign_change_struct_el ************************************
4782 **
4783 ** edialign_change_struct_el
4784 **
4785 ** @param [u] a [struct multi_frag**] Undocumented
4786 ** @param [r] l [ajint] Undocumented
4787 ** @param [r] r [ajint] Undocumented
4788 ** @return [void]
4789 *****************************************************************************/
4790
edialign_change_struct_el(struct multi_frag ** a,ajint l,ajint r)4791 static void edialign_change_struct_el(struct multi_frag **a, ajint l, ajint r)
4792 {
4793 struct multi_frag *dummy;
4794
4795 dummy = a[l];
4796 a[l] = a[r];
4797 a[r] = dummy;
4798
4799 return;
4800 }
4801
4802
4803
4804
4805 /* @funcstatic edialign_change_first ***************************************
4806 **
4807 ** edialign_change_first
4808 **
4809 ** @param [u] a [struct multi_frag*] Undocumented
4810 ** @param [u] b [struct multi_frag*] Undocumented
4811 ** @return [void]
4812 *****************************************************************************/
4813
edialign_change_first(struct multi_frag * a,struct multi_frag * b)4814 static void edialign_change_first(struct multi_frag *a, struct multi_frag *b)
4815 {
4816 struct multi_frag c;
4817 struct multi_frag *an;
4818 struct multi_frag *bn;
4819
4820 if(a==b)
4821 {
4822 /* Change the first list-element with the second one
4823 ** (old first-el.).
4824 */
4825 c = *a;
4826 an = a->next;
4827 bn = a->next->next;
4828
4829 *a = *(a->next);
4830 a->next = bn;
4831
4832 *an = c;
4833 an->next = a;
4834 }
4835 else /* Change the new first list-el. with the old first-el. */
4836 {
4837 /* Make a copy of the new first-listelement a. */
4838 c = *a;
4839 /* Make a copy of the pointer at the second-el. */
4840 an = a->next;
4841 /* Make a copy of the pointer old first-el. shows at. */
4842 bn = b->next->next;
4843
4844 /* Write the value of the old first-el. on the place of the
4845 ** new first-el.
4846 */
4847 *a = *(b->next);
4848
4849 /* Bend his "next" pointer at the next el. of the old first-el. */
4850 a->next = bn;
4851
4852 /* Write the value of the new fist-el. on the place of the
4853 ** old first-el.
4854 */
4855 *(b->next) = c;
4856
4857 /* Bend his "next" pointer at the next el. of the new first-el. */
4858 b->next->next = an;
4859
4860 b->next = a;
4861 }
4862
4863 return;
4864 }
4865
4866
4867
4868
4869 /* @funcstatic edialign_quicksort_ow ***************************************
4870 **
4871 ** edialign_quicksort_ow
4872 **
4873 ** @param [u] array [struct multi_frag**] Undocumented
4874 ** @param [r] left [ajint] Undocumented
4875 ** @param [r] right [ajint] Undocumented
4876 ** @return [void]
4877 *****************************************************************************/
4878
edialign_quicksort_ow(struct multi_frag ** array,ajint left,ajint right)4879 static void edialign_quicksort_ow(struct multi_frag **array, ajint left,
4880 ajint right)
4881 {
4882 ajint l = left;
4883 ajint r = right;
4884 struct multi_frag *element;
4885
4886 element = array[(left+right)/2];
4887
4888 do
4889 {
4890 while(array[l]->ow > element->ow)
4891 l++;
4892 while(element->ow > array[r]->ow)
4893 r--;
4894
4895 if(l < r) edialign_change_struct_el(array,l,r);
4896 if(l <= r) {l++; r--;}
4897 }while(l<=r);
4898
4899 if(left < r)
4900 edialign_quicksort_ow(array, left, r);
4901 if(l < right)
4902 edialign_quicksort_ow(array, l, right);
4903
4904
4905 return;
4906 } /*edialign_quicksort_ow*/
4907
4908
4909
4910
4911 /* @funcstatic edialign_quicksort_weight ************************************
4912 **
4913 ** edialign_quicksort_weight
4914 **
4915 ** @param [u] array [struct multi_frag**] Undocumented
4916 ** @param [r] left [ajint] Undocumented
4917 ** @param [r] right [ajint] Undocumented
4918 ** @return [void]
4919 *****************************************************************************/
4920
edialign_quicksort_weight(struct multi_frag ** array,ajint left,ajint right)4921 static void edialign_quicksort_weight(struct multi_frag **array, ajint left,
4922 ajint right)
4923 {
4924 ajint l = left;
4925 ajint r = right;
4926 struct multi_frag *element;
4927
4928 element = array[(left+right)/2];
4929
4930 do
4931 {
4932 while(array[l]->weight > element->weight) l++;
4933 while(element->weight > array[r]->weight) r--;
4934
4935 if(l < r) edialign_change_struct_el(array,l,r);
4936 if(l <= r) {l++; r--;}
4937 }while(l<=r);
4938
4939 if(left < r)
4940 edialign_quicksort_weight(array, left, r);
4941 if(l < right)
4942 edialign_quicksort_weight(array, l, right);
4943
4944
4945 return;
4946 }
4947
4948
4949
4950
4951 /* @funcstatic edialign_assemble_list ***************************************
4952 **
4953 ** edialign_assemble_list
4954 **
4955 ** @param [u] array [struct multi_frag**] Undocumented
4956 ** @param [u] dp [struct multi_frag*] Undocumented
4957 ** @param [r] number [ajint] Undocumented
4958 ** @return [void]
4959 *****************************************************************************/
4960
edialign_assemble_list(struct multi_frag ** array,struct multi_frag * dp,ajint number)4961 static void edialign_assemble_list(struct multi_frag **array,
4962 struct multi_frag *dp, ajint number)
4963 {
4964 ajint i;
4965 ajint idx = 0;
4966
4967 for(i = 0; i< number-1; i++)
4968 {
4969 if(dp==array[i])
4970 idx = i;
4971 array[i]->next = array[i+1];
4972 }
4973
4974 array[number-1]->next = 0;
4975 if(dp==array[number-1])
4976 idx = number-1;
4977
4978 if(idx!=0)
4979 edialign_change_first(array[0],array[idx-1]);
4980
4981 return;
4982 }
4983
4984
4985
4986
4987 /* @funcstatic edialign_frag_sort ***************************************
4988 **
4989 ** edialign_frag_sort
4990 **
4991 ** @param [r] number [ajint] Undocumented
4992 ** @param [u] dp [struct multi_frag*] Undocumented
4993 ** @param [r] olw [ajint] Undocumented
4994 ** @return [void]
4995 *****************************************************************************/
4996
edialign_frag_sort(ajint number,struct multi_frag * dp,ajint olw)4997 static void edialign_frag_sort(ajint number , struct multi_frag *dp ,
4998 ajint olw )
4999 {
5000 ajint i=1;
5001
5002 struct multi_frag **array = NULL;
5003 if((array = (struct multi_frag**)calloc(number+1,
5004 sizeof(struct multi_frag*)))==0)
5005 {
5006 ajFatal(" problems with memory allocation for `all_clades'\n \n");
5007 }
5008
5009 array[0] = dp;
5010 while(array[i-1]->next)
5011 {array[i] = array[i-1]->next; i++;}
5012
5013 if( olw )
5014 edialign_quicksort_ow(array,0,number);
5015 else
5016 edialign_quicksort_weight(array,0,number);
5017
5018
5019 edialign_assemble_list(array, dp, number+1);
5020
5021 AJFREE(array);
5022 return;
5023 }
5024
5025
5026
5027
5028 /* @funcstatic edialign_ow_add ***************************************
5029 **
5030 ** edialign_ow_add
5031 **
5032 ** @param [u] sm1 [struct multi_frag*] Undocumented
5033 ** @param [u] sm2 [struct multi_frag*] Undocumented
5034 ** @return [void]
5035 *****************************************************************************/
5036
edialign_ow_add(struct multi_frag * sm1,struct multi_frag * sm2)5037 static void edialign_ow_add( struct multi_frag *sm1 , struct multi_frag *sm2 )
5038 {
5039 /* increasing the overlap weights of two diagonals, if they
5040 have any overlap */
5041
5042 ajint trans;
5043 ajint i;
5044 ajint j;
5045 ajint k;
5046 ajint s1;
5047 ajint s2;
5048 ajint b1;
5049 ajint b2;
5050 ajint conslen;
5051 ajint dif;
5052 ajint match;
5053 float add_wgt;
5054
5055 trans = sm1->trans;
5056
5057 for(i=0;i<2;i++)
5058 for(j=0;j<2;j++)
5059 if( sm1->s[i] == sm2->s[j] )
5060 if( sm1->s[j] != sm2->s[i] )
5061 if( sm1->b[i] < sm2->b[j] + sm2->ext &&
5062 sm2->b[j] < sm1->b[i] + sm1->ext )
5063 {
5064 conslen = edialign_mini2( sm1->b[i] + sm1->ext,
5065 sm2->b[j] +
5066 sm2->ext)
5067 - edialign_maxi2( sm1->b[i] , sm2->b[j] );
5068 if(
5069 ( trans == 0 ) ||
5070 ( ( conslen % 3 ) == 0 )
5071 ) {
5072
5073 s1 = sm1->s[(i+1)%2];
5074 s2 = sm2->s[(j+1)%2];
5075
5076 b1 = sm1->b[(i+1)%2];
5077 dif = sm2->b[j] - sm1->b[i];
5078 if (dif > 0)
5079 b1 = b1 + dif;
5080
5081 b2 = sm2->b[(j+1)%2];
5082 dif = sm1->b[i] - sm2->b[j];
5083 if (dif > 0)
5084 b2 = b2 + dif;
5085
5086 match = 0;
5087
5088 for( k = 0 ; k < conslen ; k++ )
5089 {
5090 if(
5091 ( wgt_type == 0 ) ||
5092 ( trans && ( ( k % 3 ) == 0 ) )
5093 )
5094 match = match
5095 + sim_score[amino[s1][b1+k]][amino[s2]
5096 [b2+k]];
5097 else
5098 match = match + ( seq[ s1 ][ b1 + k ] ==
5099 seq[ s2 ][ b2 + k ] );
5100 }
5101
5102
5103 if( wgt_type == 0 )
5104 add_wgt = wgt_prot[ conslen ][ match ];
5105 else
5106 if( trans )
5107 add_wgt = wgt_trans[ conslen / 3 ][match];
5108 else
5109 add_wgt = wgt_dna[ conslen ][ match ] ;
5110
5111 sm1->ow = sm1->ow + add_wgt ;
5112 sm2->ow = sm2->ow + add_wgt ;
5113
5114 }
5115 }
5116
5117 return;
5118 }
5119
5120
5121
5122
5123 /* @funcstatic edialign_seq_shift ********************************************
5124 **
5125 ** edialign_seq_shift
5126 **
5127 ** @return [void]
5128 *****************************************************************************/
5129
edialign_seq_shift(void)5130 static void edialign_seq_shift(void)
5131 {
5132 ajuint i;
5133 ajint hv;
5134
5135 for(i = 0 ; i < seqnum ; i++)
5136 for(hv = seqlen[i]+1 ; hv > 0 ; hv--)
5137 seq[i][hv] = seq[i][hv-1];
5138
5139 return;
5140 }
5141
5142
5143
5144
5145 /* @funcstatic edialign_filter ********************************************
5146 **
5147 ** edialign_filter
5148 **
5149 ** @param [u] number [ajint*] Undocumented
5150 ** @param [u] diagonal [struct multi_frag*] Undocumented
5151 ** @return [void]
5152 *****************************************************************************/
5153
edialign_filter(ajint * number,struct multi_frag * diagonal)5154 static void edialign_filter(ajint *number, struct multi_frag *diagonal)
5155 {
5156 /* checks diagonals one by one, if they are consistent with the
5157 diagonals already included into the alignment. If a new diagonal
5158 is consistent, it is included into the alignment and the frontiers
5159 in clos (when GABIOS is used) are changed accordingly */
5160
5161 ajuint i;
5162 ajuint j;
5163 ajint hv;
5164 ajint ab[2];
5165 ajint as[2];
5166 ajint aext;
5167 ajint nv;
5168 float awgt ;
5169
5170 ajint test; /* = 1 if current diagonal consistent; = 0 otherwise */
5171 ajint number_bf; /* number of diagonals before filter */
5172
5173 FILE *fp_st;
5174 FILE *fp_cap = NULL;
5175
5176 struct multi_frag *dia;
5177 char cap_file_name[ NAME_LEN ] ;
5178
5179 if( ( istep > 0 ) && ( iter_cond_prob == 0 ) )
5180 for( i = 0 ; i < seqnum ; i++ )
5181 for( j = 0 ; j < seqnum ; j++ )
5182 cont_it_p[i][j] = 0 ;
5183
5184 dia = diagonal;
5185 number_bf = *number;
5186
5187 if( ( istep == 0 ) && anchors && ( seqnum > 2 ) )
5188 {
5189 strcpy( cap_file_name , input_name );
5190 strcat( cap_file_name , ".cap" );
5191 fp_cap = fopen( cap_file_name ,"w");
5192 }
5193
5194
5195 for(nv = 0 ; nv < number_bf ; nv++ )
5196 {
5197 ab[0] = dia->b[0]; /* begin of n-th diagonal in 1. sequence */
5198 ab[1] = dia->b[1]; /* begin of n-th diagonal in 2. sequence */
5199 as[0] = dia->s[0]; /* 1. sequence of n-th diagonal */
5200 as[1] = dia->s[1]; /* 2. sequence of n-th diagonal */
5201 aext = dia->ext; /* length of n-th diagonal */
5202 awgt = dia->weight; /* length of n-th diagonal */
5203 /*ae[0] = ab[0] + aext - 1;*/ /* end of n-th diagonal in 1. sequence */
5204 /*ae[1] = ab[1]+aext-1;*/ /* end of n-th diagonal in 2. sequence */
5205
5206
5207 if( print_status )
5208 if( ( ( nv + 1 ) % 10 ) == 0 )
5209 {
5210 fp_st = fopen( pst_name ,"w");
5211
5212 fprintf(fp_st,"\n\n\n Status of the program run:\n");
5213 fprintf(fp_st," ==========================\n\n");
5214 fprintf(fp_st," %s \n\n", input_line);
5215 fprintf(fp_st," iteration step %d \n", istep );
5216 fprintf(fp_st," checking diagonal %d for ", nv + 1);
5217 fprintf(fp_st,"consistency\n\n total number of");
5218 fprintf(fp_st," diagonals = %d \n\n\n\n", number_bf);
5219 fclose(fp_st);
5220 }
5221
5222 test = edialign_alignableSegments(gabiosclos,
5223 as[0], ab[0], as[1], ab[1],
5224 aext);
5225
5226 if(test) /* i.e current diagonal consistent with the diagonals
5227 already included into the alignment */
5228 {
5229
5230 edialign_addAlignedSegments(gabiosclos, as[0], ab[0], as[1], ab[1],
5231 aext);
5232
5233 if( istep )
5234 for(hv=0;hv<aext;hv++)
5235 for(i=0;i<2;i++)
5236 {
5237 j = (i+1)%2;
5238 open_pos[ as[i] ][ as[j] ][ ab[i]+hv ] = 0;
5239 }
5240
5241 dia->sel = 1;
5242 glob_sim[ as[0] ][ as[1] ] =
5243 glob_sim[ as[0] ][ as[1] ] + dia->weight;
5244
5245 if( istep )
5246 tot_weight = tot_weight + dia->weight;
5247
5248
5249 } /* if test, i.e. current diagonal consistent */
5250 else /* no consistency */
5251 {
5252 (*number)--;
5253 dia->sel = 0;
5254 cont_it_p[ as[0] ][ as[1] ] = 1 ;
5255 }
5256
5257 if( ( istep == 0 ) && anchors && ( seqnum > 2 ) )
5258 {
5259 fprintf( fp_cap, " anchor %d %d %d %d %d %f " , as[0] + 1,
5260 as[1] + 1 , ab[0], ab[1], aext , awgt);
5261 if( dia->sel == 0 )
5262 fprintf( fp_cap , " inconsistent ");
5263 fprintf( fp_cap , "\n");
5264 }
5265
5266
5267 dia = dia->next;
5268
5269 } /* for(hv = 0 ; hv < number_bf ; hv++ ) */
5270
5271 if( ( istep == 0 ) && anchors && ( seqnum > 2 ) )
5272 fclose( fp_cap ) ;
5273
5274 return;
5275 }
5276
5277
5278
5279
5280 /* @funcstatic edialign_sel_test ********************************************
5281 **
5282 ** edialign_sel_test
5283 **
5284 ** @return [void]
5285 *****************************************************************************/
5286
edialign_sel_test(void)5287 static void edialign_sel_test(void)
5288 {
5289 ajint hv;
5290 struct multi_frag *hp;
5291
5292 hp = this_it_dia;
5293
5294 for( hv = 0 ; hv < num_dia_af[ istep ] ; hv++ )
5295 {
5296 if( hp->sel == 0 )
5297 {
5298 ajFatal("\n\n\n sel[%d] = %d \n", hv, hp->sel);
5299 }
5300 hp = hp->next;
5301 }
5302
5303 return;
5304 }
5305
5306
5307
5308
5309 /* @funcstatic edialign_throw_out ********************************************
5310 **
5311 ** edialign_throw_out
5312 **
5313 ** @param [u] weight_sum [float*] Undocumented
5314 ** @return [void]
5315 *****************************************************************************/
5316
edialign_throw_out(float * weight_sum)5317 static void edialign_throw_out( float *weight_sum )
5318 {
5319 ajint nc;
5320 short consist_found = 0;
5321
5322 struct multi_frag *cp = NULL; /* current diagonal */
5323 struct multi_frag *hp = NULL; /* predecedor of cp */
5324
5325 /* hp = ( struct multi_frag *) calloc( 1 , sizeof( struct multi_frag ) ); */
5326 cp = this_it_dia;
5327 hp = NULL;
5328 *weight_sum = 0;
5329
5330
5331 for( nc = 0 ; nc < num_dia_bf[ istep ] ; nc++ )
5332 {
5333 if( cp->sel )
5334 {
5335 *weight_sum = *weight_sum + cp->weight;
5336 consist_found = 1;
5337
5338 hp = cp;
5339 cp = cp->next;
5340 }
5341 else
5342 {
5343 cp = cp->next;
5344 if( consist_found )
5345 {
5346 free(hp->next);
5347 hp->next = cp;
5348 }
5349 else
5350 {
5351 free( this_it_dia);
5352 this_it_dia = cp;
5353 }
5354 }
5355 }
5356
5357 return;
5358 }
5359
5360
5361
5362
5363 /* @funcstatic edialign_new_shift ********************************************
5364 **
5365 ** shifts the elements of sequence s starting with position p
5366 ** for dif elements to the right
5367 **
5368 ** @param [r] s [ajint] Undocumented
5369 ** @param [r] p [ajint] Undocumented
5370 ** @param [r] dif [ajint] Undocumented
5371 ** @return [void]
5372 *****************************************************************************/
5373
edialign_new_shift(ajint s,ajint p,ajint dif)5374 static void edialign_new_shift(ajint s, ajint p, ajint dif)
5375 {
5376 ajint hv;
5377 ajint shift_dif; /* length of a gap (if existing) between position hv
5378 and position hv+1. In case of gaps, the function
5379 `new_shift' diminishs the lengths of the gaps instead
5380 of shifting further sequence elements to the right */
5381
5382 for(hv=p ; ( hv<seqlen[s]+1 ) && (dif>0) ; hv++)
5383 {
5384 shift_dif = shift[s][hv+1] - shift[s][hv] - 1;
5385 shift[s][hv] = shift[s][hv] + dif;
5386 dif = dif - shift_dif;
5387 }
5388
5389 return;
5390 }
5391
5392
5393
5394
5395 /* @funcstatic edialign_wgt_type_count **************************************
5396 **
5397 ** edialign_wgt_type_count
5398 **
5399 ** @param [r] num [ajint] Undocumented
5400 ** @param [r] e_len [ajint] Undocumented
5401 ** @param [u] plus_cnt [ajint*] Undocumented
5402 ** @param [u] minus_cnt [ajint*] Undocumented
5403 ** @param [u] nuc_cnt [ajint*] Undocumented
5404 ** @param [u] frg_inv [ajint*] Undocumented
5405 ** @param [u] dia [struct multi_frag*] Undocumented
5406 ** @return [void]
5407 *****************************************************************************/
5408
edialign_wgt_type_count(ajint num,ajint e_len,ajint * plus_cnt,ajint * minus_cnt,ajint * nuc_cnt,ajint * frg_inv,struct multi_frag * dia)5409 static void edialign_wgt_type_count(ajint num , ajint e_len, ajint *plus_cnt,
5410 ajint *minus_cnt, ajint *nuc_cnt ,
5411 ajint *frg_inv, struct multi_frag *dia)
5412 {
5413 ajint i;
5414 ajint dc;
5415 ajint pc;
5416 ajint s1;
5417 ajint pos;
5418
5419 (void) e_len; /* make it used */
5420
5421 for( dc = 0 ; dc < num ; dc++ )
5422 {
5423
5424 for( pc = 0 ; pc < dia->ext ; pc++ )
5425 {
5426 i = dia->b[0] + pc;
5427 s1 = dia->s[0];
5428 pos = shift[s1][i];
5429 if ( dia->trans )
5430 if ( dia->cs )
5431 minus_cnt[ pos ] = minus_cnt[ pos ] + 1 ;
5432 else
5433 plus_cnt[ pos ] = plus_cnt[ pos ] + 1 ;
5434 else {
5435 nuc_cnt[ pos ] = nuc_cnt[ pos ] + 1 ;
5436 }
5437 frg_inv[ pos ] = frg_inv[ pos ] + 1 ;
5438 }
5439 dia = dia->next;
5440 }
5441
5442 return;
5443 }
5444
5445
5446
5447
5448 /* @funcstatic edialign_plot_calc **************************************
5449 **
5450 ** edialign_plot_calc
5451 **
5452 ** @param [r] num [ajint] Undocumented
5453 ** @param [r] e_len [ajint] Undocumented
5454 ** @param [u] w_count [float*] Undocumented
5455 ** @param [u] pl [float*] Undocumented
5456 ** @param [u] dia [struct multi_frag*] Undocumented
5457 ** @param [u] fp_csc [FILE*] Undocumented
5458 ** @return [void]
5459 *****************************************************************************/
5460
edialign_plot_calc(ajint num,ajint e_len,float * w_count,float * pl,struct multi_frag * dia,FILE * fp_csc)5461 static void edialign_plot_calc(ajint num , ajint e_len, float *w_count,
5462 float *pl, struct multi_frag *dia ,
5463 FILE *fp_csc )
5464 {
5465 ajint i;
5466 ajint dc;
5467 ajint pc;
5468 ajint s1;
5469 ajint pos;
5470 float max_weight = 0; /* maximum value of `weight_count' */
5471 float shrink;
5472 float shrink_csc;
5473 float hsc;
5474
5475 for( dc = 0 ; dc < num ; dc++ )
5476 {
5477
5478 for( pc = 0 ; pc < dia->ext ; pc++ )
5479 {
5480 i = dia->b[0] + pc;
5481 s1 = dia->s[0];
5482 pos = shift[s1][i];
5483 w_count[ pos ] = w_count[ pos ] + dia->weight;
5484 }
5485 dia = dia->next;
5486 }
5487
5488
5489 for( i = 0 ; i <= e_len ; i++ )
5490 if( max_weight < w_count[i] )
5491 max_weight = w_count[i];
5492
5493
5494 if( max_weight )
5495 {
5496 shrink = plot_num / max_weight;
5497 shrink_csc = MAX_CSC / max_weight;
5498
5499 for( i = 0 ; i <= e_len ; i++ )
5500 pl[i] = w_count[i] * shrink;
5501
5502 if( col_score )
5503 {
5504 printf(" e_len = %d \n\n", e_len) ;
5505 for( i = 0 ; i <= e_len ; i++ )
5506 {
5507 hsc = w_count[i] * shrink_csc ;
5508 fprintf( fp_csc , "%5.1f\t0\n", hsc ) ;
5509 }
5510 }
5511 }
5512 else
5513 {
5514 for( i = 0 ; i <= e_len ; i++ )
5515 pl[i] = 0 ;
5516
5517 printf(" e_len = %d \n\n", e_len) ;
5518 printf(" no max weight\n\n");
5519 }
5520
5521 return;
5522 }
5523
5524
5525
5526
5527 /* @funcstatic edialign_av_tree_print **************************************
5528 **
5529 ** edialign_av_tree_print
5530 **
5531 ** @return [void]
5532 *****************************************************************************/
5533
edialign_av_tree_print(void)5534 static void edialign_av_tree_print(void)
5535 {
5536 ajuint i;
5537 ajuint j;
5538 ajint k;
5539 ajuint tconnect;
5540 ajint max_pair[2] = {0,0};
5541 ajuint m1;
5542 ajuint m2;
5543 struct subtree *all_clades = NULL;
5544 double **clade_similarity = NULL;
5545 double new_similarity = 0.;
5546 double max_sim;
5547 char *string = NULL;
5548 char l_name[2][20];
5549 float branch_len[2];
5550 float depth;
5551
5552 if( (all_clades = (struct subtree *)
5553 calloc( seqnum , sizeof( struct subtree ) )) == NULL)
5554 {
5555 ajFatal(" problems with memory allocation for `all_clades'\n \n");
5556 }
5557
5558
5559 if( (clade_similarity = (double **)
5560 calloc( seqnum , sizeof( double* ) )) == NULL)
5561 embExitBad();
5562
5563 for(i = 0 ; i < seqnum ; i++ )
5564 if( (clade_similarity[i] = (double *)
5565 calloc( seqnum , sizeof( double ) )) == NULL)
5566 embExitBad();
5567
5568 if( (string = (char *)
5569 calloc( seqnum * 100 , sizeof(char) )) == NULL)
5570 {
5571 printf(" problems with memory allocation for `string'\n \n");
5572 embExitBad();
5573 }
5574
5575
5576
5577
5578 for(i = 0 ; i < seqnum ; i++ )
5579 {
5580 if( (all_clades[i].member = (int *)
5581 calloc( seqnum , sizeof( int ) )) == NULL)
5582 {
5583 printf(" problems with memory allocation for `all_clades'\n \n");
5584 embExitBad();
5585 }
5586
5587
5588 if( (all_clades[i].name = (char *)
5589 calloc( seqnum * 100 , sizeof( char ) )) == NULL)
5590 {
5591 printf(" problems with memory allocation for `all_clades'\n \n");
5592 embExitBad();
5593 }
5594
5595 strcpy( all_clades[i].name , seq_name[i] );
5596 all_clades[i].member_num = 1;
5597 all_clades[i].member[0] = i;
5598 all_clades[i].valid = 1;
5599 all_clades[i].depth = 0;
5600 }
5601
5602
5603
5604 for(i = 0 ; i < seqnum ; i++ )
5605 for(j = i + 1 ; j < seqnum ; j++ )
5606 {
5607 clade_similarity[i][j] = glob_sim[i][j];
5608 clade_similarity[j][i] = glob_sim[i][j];
5609 }
5610
5611
5612 for(tconnect = 1 ; tconnect < seqnum ; tconnect++)
5613 {
5614 max_sim = - 1;
5615
5616
5617
5618 for(i = 0 ; i < seqnum ; i++ )
5619 for(j = 0 ; j < seqnum ; j++ )
5620 if( i != j )
5621 if( all_clades[i].valid && all_clades[j].valid )
5622 if( clade_similarity[i][j] > max_sim )
5623 {
5624 max_sim = clade_similarity[i][j];
5625 max_pair[0] = i;
5626 max_pair[1] = j;
5627 }
5628
5629
5630 depth = (float) (1 / ( max_sim + 1 ));
5631
5632 {
5633 m1 = max_pair[0];
5634 m2 = max_pair[1];
5635
5636 for( i = 0 ; i < seqnum ; i++ )
5637 if( all_clades[i].valid )
5638 if( i != m1 )
5639 if( i != m2 )
5640 {
5641 if( ! strcmp(clust_sim , "av") )
5642 new_similarity =
5643 (
5644 clade_similarity[i][m1] *
5645 all_clades[m1].member_num +
5646 clade_similarity[i][m2] *
5647 all_clades[m2].member_num
5648 ) /
5649 ( all_clades[m1].member_num +
5650 all_clades[m2].member_num );
5651
5652 if( ! strcmp(clust_sim , "max") )
5653 new_similarity =
5654 edialign_maxf2(
5655 (float) clade_similarity[i][m1] ,
5656 (float) clade_similarity[i][m2] );
5657
5658 if( ! strcmp(clust_sim , "min") )
5659 new_similarity =
5660 edialign_minf2(
5661 (float) clade_similarity[i][m1],
5662 (float) clade_similarity[i][m2]);
5663
5664
5665 clade_similarity[i][m1] = new_similarity;
5666 clade_similarity[m1][i] = new_similarity;
5667 }
5668
5669
5670 all_clades[m2].valid = 0;
5671
5672 for(k = 0 ; k < all_clades[m2].member_num ; k++)
5673 all_clades[m1].member[ all_clades[m1].member_num + k ] =
5674 all_clades[m2].member[ k ] ;
5675
5676 all_clades[m1].member_num =
5677 all_clades[m1].member_num + all_clades[m2].member_num;
5678
5679
5680 for(k = 0 ; k < 2 ; k++)
5681 {
5682 branch_len[k] = depth - all_clades[ max_pair[k] ].depth;
5683 sprintf( l_name[k],":%f", branch_len[k]);
5684 }
5685
5686
5687 all_clades[m1].depth = depth;
5688
5689
5690 strcpy(string,"(");
5691 strcat(string, all_clades[m1].name);
5692 strcat(string,l_name[0]);
5693 /* strcat(string,",\n"); */
5694 strcat(string, all_clades[m2].name);
5695 strcat(string,l_name[1]);
5696 strcat(string,")");
5697
5698 strcpy( all_clades[m1].name , string );
5699 }
5700 }
5701
5702
5703 strcat(string, ";");
5704
5705 i = strlen( string ) + 2;
5706
5707 if( (upg_str = (char *) calloc( i , sizeof(char) )) == NULL)
5708 {
5709 printf(" problems with memory allocation for `upg_str'\n \n");
5710 embExitBad();
5711 }
5712
5713 for(i = 0 ; i <= strlen( string ) ; i++ )
5714 upg_str[i] = string[i] ;
5715
5716 for(i=0; i < seqnum; i++)
5717 {
5718 AJFREE(all_clades[i].member);
5719 AJFREE(all_clades[i].name);
5720 AJFREE(clade_similarity[i]);
5721 }
5722 AJFREE(all_clades);
5723 AJFREE(clade_similarity);
5724
5725 AJFREE(string);
5726 return;
5727 }
5728
5729
5730
5731
5732 /* @funcstatic edialign_print_log **************************************
5733 **
5734 ** edialign_print_log
5735 **
5736 ** @param [u] d [struct multi_frag*] Undocumented
5737 ** @param [u] fp_l [FILE*] Undocumented
5738 ** @param [u] fp_fs [FILE*] Undocumented
5739 ** @return [void]
5740 *****************************************************************************/
5741
edialign_print_log(struct multi_frag * d,FILE * fp_l,FILE * fp_fs)5742 static void edialign_print_log(struct multi_frag *d,FILE *fp_l,FILE *fp_fs)
5743 {
5744 ajuint i;
5745 ajuint j;
5746 ajint pv;
5747 ajint percent;
5748 ajint this_frag_trans;
5749 ajint frg_count = 0;
5750 struct multi_frag *diagonal;
5751 char hc;
5752
5753 if(long_output)
5754 {
5755 fprintf(fp_l," \n \n Iteration %d:\n", istep );
5756
5757 if( istep < 10 )
5758 fprintf(fp_l," ------------");
5759 else
5760 fprintf(fp_l," -------------");
5761 }
5762
5763
5764 for(i= 0 ; i<seqnum ; i++)
5765 for(j= i+1 ; j<seqnum; j++)
5766 {
5767 if(long_output) {
5768 if( seqnum > 2 ) {
5769 fprintf(fp_l, "\n \n \n \n Pairwise alignment ");
5770 fprintf(fp_l, "%u/%u", i + 1, j + 1);
5771 fprintf(fp_l, " (%s / %s) \n" ,seq_name[i],seq_name[j] );
5772 fprintf(fp_l, " =========================");
5773 fprintf(fp_l, "===================== ");
5774 }
5775 fprintf(fp_l, " \n \n \n");
5776 }
5777
5778 pairalignsum = 0;
5779 pairalignlen = 0;
5780
5781 diagonal = d;
5782 while(diagonal != NULL)
5783 {
5784 frg_count++ ;
5785 if( diagonal->s[0] == (ajint)i && diagonal->s[1] == (ajint)j)
5786 {
5787 if(diagonal->sel)
5788 {
5789 if(long_output)
5790 {
5791 fprintf(fp_l," *");
5792 fprintf(fp_l," (%3d,", diagonal->b[0]);
5793 }
5794
5795 pairalignsum = pairalignsum + diagonal->weight;
5796 pairalignlen = pairalignlen + diagonal->ext;
5797 }
5798 else
5799 if(long_output)
5800 fprintf(fp_l," (%3d,", diagonal->b[0]);
5801
5802 if(long_output)
5803 {
5804 fprintf(fp_l,"%3d) ", diagonal->b[1]);
5805 fprintf(fp_l," wgt:%7.3f ", diagonal->weight);
5806 if(seqnum > 2)
5807 if(overlap_weights)
5808 fprintf(fp_l," olw:%7.3f ", diagonal->ow);
5809 fprintf(fp_l,"len: %2d", diagonal->ext);
5810 if( ( wgt_type == 3 ) || crick_strand )
5811 {
5812 if( diagonal->trans )
5813 fprintf(fp_l," P-frg" );
5814 else
5815 fprintf(fp_l," N-frg" );
5816 }
5817
5818 if( diagonal->trans )
5819 if( crick_strand )
5820 {
5821 if( diagonal->cs )
5822 fprintf(fp_l,", CRICK strand " );
5823 else
5824 fprintf(fp_l,", WATSON strand " );
5825 }
5826
5827 }
5828
5829 if( frg_mult_file_v )
5830 {
5831 fprintf(fp_fs,"FRG %d ", frg_count );
5832 fprintf(fp_fs,"name: %s %s ",
5833 seq_name[i] , seq_name[j] ) ;
5834
5835 fprintf(fp_fs,"seq: %u %u ", i + 1 , j + 1 ) ;
5836 fprintf(fp_fs,"beg: %d %d ", diagonal->b[0],
5837 diagonal->b[1]);
5838 fprintf(fp_fs,"len: %d ", diagonal->ext);
5839
5840 fprintf(fp_fs,"wgt:%7.3f ", diagonal->weight);
5841 if(diagonal->sel)
5842 fprintf(fp_fs," CONS ");
5843 else
5844 fprintf(fp_fs," NON-CONS ");
5845 fprintf(fp_fs,"\n") ;
5846 fprintf(fp_fs,"SEG1 ");
5847 for(pv = 0 ; pv < diagonal->ext ; pv ++)
5848 fprintf(fp_fs,"%c", seq[i][diagonal->b[0] + pv]);
5849 fprintf(fp_fs,"\n");
5850
5851 fprintf(fp_fs,"SEG2 ");
5852 for(pv = 0 ; pv < diagonal->ext ; pv ++)
5853 fprintf(fp_fs,"%c", seq[j][diagonal->b[1] + pv]);
5854 fprintf(fp_fs,"\n");
5855 fprintf(fp_fs,"\n");
5856 }
5857
5858 if( frg_mult_file & ! frg_mult_file_v )
5859 {
5860 if( diagonal->sel )
5861 {
5862 fprintf(fp_fs," %u %u ", i + 1 , j + 1 ) ;
5863 fprintf(fp_fs," %d %d ", diagonal->b[0],
5864 diagonal->b[1]);
5865 fprintf(fp_fs," %d \n", diagonal->ext);
5866 }
5867 }
5868
5869 if(long_output)
5870 {
5871 fprintf(fp_l,"\n");
5872
5873 if(
5874 wgt_type == 2 ||
5875 ( ( wgt_type == 3 ) && diagonal->trans )
5876 )
5877 this_frag_trans = 1;
5878 else
5879 this_frag_trans = 0;
5880
5881 if( this_frag_trans )
5882 {
5883 fprintf(fp_l,"\n ");
5884 for(pv = 0 ; pv < diagonal->ext ; pv ++)
5885 {
5886 hc = amino_acid[ amino[i][ diagonal->b[0] +
5887 pv - 1 ] ] ;
5888 if( crick_strand )
5889 if( diagonal->cs )
5890 hc = amino_acid[amino_c[i]
5891 [diagonal->b[0] +
5892 pv - 1 ] ] ;
5893
5894 if( ( pv % 3 ) == 0 )
5895 fprintf(fp_l,"/");
5896 if( ( pv % 3 ) == 1 )
5897 fprintf(fp_l,"%c", hc ) ;
5898 if( ( pv % 3 ) == 2 )
5899 fprintf(fp_l,"\\");
5900
5901 }
5902 }
5903
5904 fprintf(fp_l,"\n ");
5905 for(pv = 0 ; pv < diagonal->ext ; pv ++)
5906 fprintf(fp_l,"%c", seq[i][ diagonal->b[0] + pv ] );
5907 fprintf(fp_l,"\n");
5908
5909
5910 fprintf(fp_l," ");
5911 for(pv = 0 ; pv < diagonal->ext ; pv ++)
5912 fprintf(fp_l,"%c", seq[j][ diagonal->b[1] + pv ] );
5913
5914
5915 if( this_frag_trans )
5916 {
5917 fprintf(fp_l,"\n ");
5918 for(pv = 0 ; pv < diagonal->ext ; pv ++)
5919 {
5920 hc = amino_acid[ amino[j][ diagonal->b[1] +
5921 pv - 1 ]
5922 ] ;
5923 if( crick_strand )
5924 if( diagonal->cs )
5925 hc = amino_acid[ amino_c[j]
5926 [ diagonal->b[1] +
5927 pv - 1 ] ] ;
5928
5929 if( ( pv % 3 ) == 0 )
5930 fprintf(fp_l,"\\");
5931 if( ( pv % 3 ) == 1 )
5932 fprintf(fp_l,"%c", hc ) ;
5933 if( ( pv % 3 ) == 2 )
5934 fprintf(fp_l,"/");
5935
5936 }
5937 }
5938
5939 fprintf(fp_l,"\n \n");
5940 }
5941 } /* if( diagonal->s[0] == i && diagonal->s[1] == j) */
5942
5943 diagonal = diagonal->next;
5944
5945 } /* while(diagonal != NULL) */
5946
5947 percent = pairalignlen*100/edialign_mini2(seqlen[i],seqlen[j]);
5948
5949 if(long_output)
5950 {
5951 fprintf(fp_l,"\n Sum of diagonal scores: %f\n",
5952 pairalignsum);
5953 fprintf(fp_l," Aligned residues: %d\n", pairalignlen);
5954 fprintf(fp_l," (%d percent of the shorter", percent);
5955 fprintf(fp_l," sequence aligned)\n");
5956 }
5957 } /* for(i = 0 ; i < seqnum ; i++)
5958 for(j = i + 1 ; j < seqnum ; j++) */
5959
5960 return;
5961 }
5962
5963
5964
5965
5966 /* @funcstatic edialign_word_count ******************************************
5967 **
5968 ** edialign_word_count
5969 **
5970 ** @param [u] str [char*] Undocumented
5971 ** @return [ajint] Undocumented
5972 *****************************************************************************/
5973
edialign_word_count(char * str)5974 static ajint edialign_word_count( char *str )
5975 {
5976
5977 short word = 0 ;
5978 ajuint i ;
5979 ajint word_len = 0 ;
5980
5981 for( i = 0 ; i < strlen( str ) - 1 ; i++ )
5982 {
5983 if( ( str[i] != ' ' ) && ( str[i] != '\t' ) )
5984 {
5985 if( ! word )
5986 {
5987 word_len++ ;
5988 word = 1 ;
5989 }
5990 }
5991 else
5992 word = 0 ;
5993
5994 }
5995
5996 return( word_len ) ;
5997 }
5998
5999
6000
6001
6002 /* @funcstatic edialign_exclude_frg_read *************************************
6003 **
6004 ** edialign_exclude_frg_read
6005 **
6006 ** @param [u] file_name [char*] Undocumented
6007 ** @param [u] exclude_list [int***] Undocumented
6008 ** @return [void]
6009 *****************************************************************************/
6010
edialign_exclude_frg_read(char * file_name,int *** exclude_list)6011 static void edialign_exclude_frg_read( char *file_name , int ***exclude_list)
6012 {
6013 char exclude_file_name[ NAME_LEN ];
6014 FILE *fp;
6015 char line[ 10000 ];
6016 ajint i;
6017 ajint len;
6018 ajint beg1;
6019 ajint beg2;
6020 ajuint seq1;
6021 ajuint seq2;
6022
6023 strcpy( exclude_file_name , file_name );
6024 strcat( exclude_file_name , ".xfr" );
6025
6026 if( (fp = fopen( exclude_file_name, "r")) == NULL)
6027 edialign_erreur("\n\n cannot find file with excluded fragments \n\n");
6028
6029
6030
6031 while( fgets( line , MLINE , fp ) != NULL )
6032 {
6033 if( strlen( line ) > 4 )
6034 {
6035 sscanf(line,"%u %u %d %d %d", &seq1, &seq2, &beg1, &beg2 , &len);
6036
6037 if( seq1 > seqnum )
6038 {
6039 printf ("\n\n exclueded fragment makes no sense!\n\n");
6040 printf (" wrong sequence no %u in fragment\n\n", seq1 );
6041 printf ("%u %u %d %d %d \n\n ", seq1, seq2, beg1, beg2 , len);
6042 embExitBad() ;
6043 }
6044
6045 if( seq2 > seqnum )
6046 {
6047 printf ("\n\n excluded fragment makes no sense!\n\n");
6048 printf (" wrong sequence no %u in fragment\n\n", seq2 );
6049 printf (" %u %u %d %d %d \n\n", seq1, seq2, beg1, beg2,
6050 len );
6051 embExitBad() ;
6052 }
6053
6054 /*
6055 seq1 = seq1 - 1;
6056 seq2 = seq2 - 1;
6057 */
6058
6059 if( beg1 + len > seqlen[ seq1 - 1 ] + 1 ){
6060 printf ("\n\n excluded fragment makes no sense!\n");
6061 printf (" fragment");
6062 printf (" \" %d %d %d %d %d \"\n", seq1, seq2, beg1,
6063 beg2 , len );
6064 printf (" doesn't fit into sequence %u:\n", seq1 );
6065 printf (" sequence %u has length = %d\n\n", seq1 ,
6066 seqlen[ seq1 - 1 ] );
6067 embExitBad() ;
6068 }
6069
6070
6071
6072 for( i = 0 ; i < len ; i++ )
6073 {
6074 exclude_list[ seq1 - 1 ][ seq2 - 1 ][ beg1 + i ] = beg2 + i ;
6075 }
6076 }
6077 }
6078
6079 return;
6080 }
6081
6082
6083
6084
6085 #if 0
6086 /* @funcstatic edialign_ws_remove *************************************
6087 **
6088 ** edialign_ws_remove
6089 **
6090 ** @param [u] str [char*] Undocumented
6091 =============================
6092 ** @return [void]
6093 *****************************************************************************/
6094
6095 static void edialign_ws_remove( char *str )
6096 {
6097 ajint pv = 0 ;
6098
6099 while( ( str[ pv ] == ' ' ) || ( str[ pv ] == '\t' ) )
6100 pv++;
6101
6102 strcpy( str , str + pv );
6103
6104 return;
6105 }
6106
6107
6108
6109
6110 /* @funcstatic edialign_n_clean *************************************
6111 **
6112 ** edialign_n_clean
6113 **
6114 ** @param [u] str [char*] Undocumented
6115 ** @return [void]
6116 *****************************************************************************/
6117
6118 static void edialign_n_clean( char *str )
6119 {
6120 ajint pv = 0 ;
6121 char *char_ptr ;
6122
6123 while( ( str[ pv ] == ' ' ) ||
6124 ( str[ pv ] == '\t' ) ||
6125 ( str[ pv ] == '>' ) )
6126 pv++;
6127
6128 strcpy( str , str + pv ) ;
6129
6130 if( ( char_ptr = strchr( str ,' ') ) != NULL)
6131 *char_ptr = '\0';
6132 if( ( char_ptr = strchr( str ,'\t') ) != NULL)
6133 *char_ptr = '\0';
6134 if( ( char_ptr = strchr( str ,'\n') ) != NULL)
6135 *char_ptr = '\0';
6136
6137 return;
6138 }
6139
6140
6141
6142
6143 /* @funcstatic edialign_fasta_test *************************************
6144 **
6145 ** edialign_fasta_test
6146 **
6147 ** @param [u] seq_file [char*] Undocumented
6148 ** @return [void]
6149 *****************************************************************************/
6150
6151 static void edialign_fasta_test( char *seq_file )
6152 {
6153 ajint test = 1;
6154 FILE *fp;
6155
6156 char line[ MAX_INPUT_LINE ] ;
6157
6158 if( (fp = fopen( seq_file , "r")) == NULL)
6159 {
6160 printf("\n\n Cannot find sequence file %s \n\n\n", seq_file );
6161 embExitBad() ;
6162 }
6163
6164 while( test )
6165 {
6166 fgets( line , MAX_INPUT_LINE , fp );
6167
6168 edialign_ws_remove( line );
6169
6170 if( line[0] != '\n' )
6171 {
6172 if( line[0] == '>' )
6173 test = 0;
6174 else
6175 edialign_erreur("\n\n file not in FASTA format \n\n");
6176 }
6177 }
6178
6179 fclose( fp );
6180
6181 return;
6182 }
6183 #endif
6184
6185
6186
6187
6188 #if 0
6189 /* @funcstatic edialign_seq_read ********************************************
6190 **
6191 ** edialign_seq_read. unused.
6192 **
6193 ** @param [u] seq_file [char*] Undocumented
6194 ** @param [u] sq [char* [MAX_SEQNUM]] Undocumented
6195 ** @param [u] sqn [char**] Undocumented
6196 ** @param [u] fsqn [char**] Undocumented
6197 ** @return [ajint] Undocumented
6198 *****************************************************************************/
6199
6200 static ajint edialign_seq_read(char *seq_file, char *sq[MAX_SEQNUM] ,
6201 char **sqn , char **fsqn)
6202 {
6203 char line[ MAX_INPUT_LINE ] ;
6204
6205 ajint sn, i, k , crc ;
6206 ajint j = 0;
6207 FILE *fp;
6208 ajint max_char[ MAX_SEQNUM ] ;
6209
6210 if( (fp = fopen( seq_file , "r")) == NULL)
6211 {
6212 printf("\n\n Cannot find sequence file %s \n\n\n", seq_file );
6213 embExitBad() ;
6214 }
6215 edialign_fasta_test( seq_file );
6216
6217 sn = -1 ;
6218 while( fgets( line , MAX_INPUT_LINE , fp ) != NULL )
6219 {
6220 edialign_ws_remove( line );
6221
6222 if( line[0] == '>' )
6223 {
6224 sn++;
6225
6226
6227 edialign_n_clean( line );
6228
6229
6230 fsqn[ sn ] = ( char * ) calloc(strlen(line) + 3 , sizeof(char));
6231
6232 strcpy( fsqn[ sn ] , line ) ;
6233
6234
6235 max_char[ sn ] = 0;
6236 sqn[ sn ] = (char *) calloc( SEQ_NAME_LEN + 3 , sizeof(char));
6237
6238 for( crc = 0 ; crc < SEQ_NAME_LEN ; crc++ )
6239 if( crc < strlen(line) )
6240 sqn[ sn ][ crc ] = line[ crc ] ;
6241 else
6242 sqn[ sn ][ crc ] = ' ';
6243
6244 sqn[ sn ][ SEQ_NAME_LEN ] = '\0';
6245
6246
6247
6248 }
6249
6250
6251 else
6252 max_char[ sn ] = max_char[ sn ] + strlen( line ) - 1 ;
6253 }
6254
6255 for( i = 0 ; i <= sn ; i++ )
6256 {
6257 sq[ i ] = ( char * ) calloc( max_char[ i ] + 1 , sizeof ( char ) );
6258 }
6259
6260 if( (seqlen = (int *) calloc( ( sn + 1 ) , sizeof(int) )) == NULL)
6261 edialign_erreur("\n problems with memory allocation for `seqlen' \n");
6262
6263
6264 fclose( fp );
6265
6266
6267 /******************************************/
6268
6269 if( self_comparison == 1 )
6270 {
6271 if( sn != 0 ) {
6272 printf("\n\n With option \"self comparison\" input file "
6273 "must contain one single sequence \n\n" );
6274 embExitBad() ;
6275 }
6276
6277 sq[ 1 ] = ( char * ) calloc( max_char[ 0 ] + 1 , sizeof ( char ) );
6278
6279 sqn[ 1 ] = ( char * ) calloc( strlen( line ) + 3 , sizeof ( char ) );
6280 strcpy( sqn[ 1 ] , sqn[ 0 ] ) ;
6281 }
6282
6283 /******************************************/
6284
6285
6286 if( (fp = fopen( seq_file , "r")) == NULL)
6287 edialign_erreur("\n\n no seq file \n\n");
6288
6289 sn = -1 ;
6290 while( fgets( line , MAX_INPUT_LINE , fp ) != NULL )
6291 {
6292 edialign_ws_remove( line );
6293 if( line[0] == '>' )
6294 {
6295 sn++;
6296 j = 0;
6297 }
6298 else
6299 for( k = 0 ; k < strlen( line ) ; k++ )
6300 if(
6301 (( line[ k ] >= 65 ) && ( line[ k ] <= 90 )) ||
6302 (( line[ k ] >= 97 ) && ( line[ k ] <= 122 ))
6303 )
6304 sq[ sn ][ j++ ] = toupper( line[ k ] ) ;
6305 }
6306
6307 sn++;
6308
6309 for( i = 0 ; i < sn ; i++ )
6310 {
6311 seqlen[ i ] = strlen ( sq[ i ] ) ;
6312 }
6313
6314 if( self_comparison )
6315 {
6316 seqlen[ 1 ] = seqlen[ 0 ] ;
6317 for( i = 0 ; i <= seqlen[ 0 ] ; i++ )
6318 sq[ 1 ][ i ] = sq[ 0 ][ i ] ;
6319 sn++;
6320 }
6321
6322 fclose( fp );
6323
6324 return( sn );
6325 }
6326 #endif
6327
6328
6329
6330
6331 /* @funcstatic edialign_matrix_read *****************************************
6332 **
6333 ** edialign_matrix_read
6334 **
6335 ** @param [u] fp_mat [FILE*] Undocumented
6336 ** @return [void]
6337 *****************************************************************************/
6338
edialign_matrix_read(FILE * fp_mat)6339 static void edialign_matrix_read( FILE *fp_mat )
6340 {
6341 ajint i, j;
6342 char line[MLINE], dummy[MLINE];
6343
6344 /* Ubuntu warns if fgets return is not tested */
6345 if(!fgets( line , MLINE , fp_mat ))
6346 {
6347 }
6348
6349 if(!fgets( line , MLINE , fp_mat ))
6350 {
6351 }
6352
6353
6354 for( i = 1 ; i <= 20 ; i++ )
6355 {
6356 for(j=i;j<=20;j++)
6357 {
6358 /* Ubuntu warns if fscanf return is not tested */
6359 if(fscanf( fp_mat , "%d" , &sim_score[i][j]) != 1)
6360 {
6361 }
6362
6363 sim_score[j][i] = sim_score[i][j];
6364 if ( sim_score[i][j] > max_sim_score )
6365 max_sim_score = sim_score[i][j] ;
6366 }
6367
6368 /* Ubuntu warns if fscanf return is not tested */
6369 if(fscanf( fp_mat, "%s\n", dummy) != 1)
6370 {
6371 }
6372
6373 }
6374
6375
6376 /* fclose(fp_mat); */
6377
6378 for( i = 0 ; i <= 20 ; i++ )
6379 {
6380 sim_score[i][0] = 0 ;
6381 sim_score[0][i] = 0 ;
6382 }
6383
6384 /*
6385 sim_score[0][0] = max_sim_score ;
6386 */
6387 return;
6388 }
6389
6390
6391
6392
6393 /* @funcstatic edialign_tp400_read *****************************************
6394 **
6395 ** edialign_tp400_read
6396 **
6397 ** @param [r] w_type [ajint] Undocumented
6398 ** @param [u] pr_ptr [double**] Undocumented
6399 ** @return [void]
6400 *****************************************************************************/
6401
edialign_tp400_read(ajint w_type,double ** pr_ptr)6402 static void edialign_tp400_read( ajint w_type , double **pr_ptr )
6403 {
6404
6405 /* reads probabilities from file */
6406 /* w_type = 0 (protein), 1 (dna w/o transl.), 2 (dna with transl.) */
6407
6408 char line[MLINE];
6409 /*
6410 char file_name[MLINE];
6411 char suffix[10];
6412 */
6413 char str[MLINE] ;
6414 ajint sum, len;
6415 double pr;
6416 AjPFile etpfile = NULL;
6417 AjPStr tnstr = NULL;
6418
6419 FILE *fp;
6420
6421
6422 tnstr = ajStrNew();
6423
6424
6425 if ( w_type == 0 )
6426 {
6427 etpfile = ajDatafileNewInNameC("tp400_prot");
6428 /* strcpy( suffix , "prot" );*/
6429 }
6430
6431 else if ( w_type == 1 )
6432 {
6433 etpfile = ajDatafileNewInNameC("tp400_dna");
6434 /* strcpy( suffix , "dna" );*/
6435 }
6436
6437 else if ( w_type == 2 )
6438 {
6439 etpfile = ajDatafileNewInNameC("tp400_trans");
6440 /* strcpy( suffix , "trans" );*/
6441 }
6442
6443 /*
6444 strcpy( file_name , par_dir );
6445 strcat( file_name , "/tp400_" );
6446 strcat( file_name , suffix );
6447
6448
6449 if ( ( fp = fopen( file_name , "r" ) ) == NULL )
6450 {
6451 printf("\n\n Cannot find the file %s \n\n", file_name );
6452 printf(" Make sure the environment variable DIALIGN2_DIR points\n");
6453 printf(" to a directory containing the files \n\n");
6454 printf(" BLOSUM \n tp400_dna\n tp400_prot \n tp400_trans "
6455 "\n\n" );
6456 printf(" These files should be contained in the DIALIGN package "
6457 "\n\n\n" ) ;
6458 embExitBad() ;
6459 }
6460 */
6461
6462
6463 /*
6464 ajStrAssignS(&tnstr,ajFileGetPrintnameS(etpfile));
6465 ajFileClose(&etpfile);
6466 fp = fopen(ajStrGetPtr(tnstr),"rb");
6467 */
6468 fp = ajFileGetFileptr(etpfile);
6469
6470 if ( fgets( line , MLINE , fp ) == NULL )
6471 ajFatal("\n\n problem with tp400 file \n\n");
6472 else
6473 if( w_type % 2 )
6474 av_sim_score_nuc = (float) atof( line );
6475 else
6476 av_sim_score_pep = (float) atof( line );
6477
6478
6479 while( fgets( line , MLINE , fp ) != NULL )
6480 {
6481 sscanf(line,"%d %d %s", &len, &sum, str );
6482
6483 pr = atof(str);
6484 pr_ptr[len][sum] = pr;
6485
6486 }
6487
6488
6489
6490 ajStrDel(&tnstr);
6491 /* fclose(fp); */
6492 ajFileClose(&etpfile);
6493
6494
6495 return;
6496 }
6497
6498
6499
6500
6501 /* @funcstatic edialign_subst_mat *****************************************
6502 **
6503 ** edialign_subst_mat
6504 **
6505 ** @param [u] file_name [char*] Undocumented
6506 ** @param [r] fragno [int] Undocumented
6507 ** @param [u] frg [struct multi_frag*] Undocumented
6508 ** @return [void]
6509 *****************************************************************************/
6510
edialign_subst_mat(char * file_name,int fragno,struct multi_frag * frg)6511 static void edialign_subst_mat( char *file_name, int fragno ,
6512 struct multi_frag *frg )
6513 {
6514 ajint ii;
6515 ajuint i;
6516 ajuint j;
6517 ajuint s0, s1;
6518 ajint frg_count ;
6519 short a0 , a1 ;
6520 ajint ****sbsmt ;
6521 struct multi_frag *frag ;
6522 char mat_file_name[ NAME_LEN ] ;
6523 FILE *fp_mat;
6524
6525
6526 if( ( sbsmt = (int **** ) calloc( seqnum , sizeof(int ***))) == NULL)
6527 {
6528 printf("Problems with memory allocation for sbsmt\n");
6529 embExitBad();
6530 }
6531
6532 for( i = 0 ; i < seqnum ; i++ )
6533 if( ( sbsmt[i] = (int *** ) calloc( seqnum , sizeof(int **))) == NULL)
6534 {
6535 printf("Problems with memory allocation for sbsmt\n");
6536 embExitBad();
6537 }
6538
6539 for( i = 0 ; i < seqnum ; i++ )
6540 for( j = 0 ; j < seqnum ; j++ )
6541 if((sbsmt[i][j] = (int ** ) calloc(21,sizeof(int*))) == NULL)
6542 {
6543 printf("Problems with memory allocation for sbsmt\n");
6544 embExitBad();
6545 }
6546
6547 for( i = 0 ; i < seqnum ; i++ )
6548 for( j = 0 ; j < seqnum ; j++ )
6549 for( a0 = 0 ; a0 < 21 ; a0++ )
6550 if((sbsmt[i][j][a0] = (int *) calloc(21,sizeof(int))) == NULL)
6551 {
6552 printf("Problems with memory allocation for sbsmt\n");
6553 embExitBad();
6554 }
6555
6556 for( i = 0 ; i <seqnum ; i++ )
6557 for( j = 0 ; j <seqnum ; j++ )
6558 for( a0 = 0 ; a0 <= 20 ; a0++ )
6559 for( a1 = 0 ; a1 <= 20 ; a1++ )
6560 sbsmt[ i ][ j ][ a0 ][ a1 ] = 0 ;
6561
6562
6563 strcpy( mat_file_name , file_name );
6564 strcat( mat_file_name , ".mat" );
6565
6566 fp_mat = fopen( mat_file_name, "w") ;
6567
6568
6569
6570 frag = frg ;
6571
6572 for( frg_count = 0 ; frg_count < fragno ; frg_count++ )
6573 {
6574 if( frag->weight > sf_mat_thr )
6575 for( ii = 0 ; ii < frag->ext ; ii++ )
6576 {
6577 a0 = amino[ frag->s[0] ][ frag->b[0] + ii ] ;
6578 a1 = amino[ frag->s[1] ][ frag->b[1] + ii ] ;
6579 s0 = frag->s[0] ;
6580 s1 = frag->s[1] ;
6581 sbsmt[ s0 ][ s1 ][ a0 ][ a1 ]++ ;
6582 sbsmt[ s1 ][ s0 ][ a1 ][ a0 ]++ ;
6583
6584 }
6585 frag = frag->next ;
6586 }
6587
6588
6589 fprintf( fp_mat, "taxanumber: %u ;\n", seqnum) ;
6590 fprintf( fp_mat, "description: DIALIGN alignment ;\n" ) ;
6591 fprintf( fp_mat, "description: %s;\n", input_line ) ;
6592
6593
6594 for( i = 0 ; i < seqnum ; i++ )
6595 fprintf( fp_mat, "taxon: %.3u name: %s ;\n", i + 1 , full_name[i] ) ;
6596
6597
6598 for( s0 = 0 ; s0 < seqnum ; s0++ )
6599 for( s1 = s0 + 1 ; s1 < seqnum ; s1++ )
6600 for ( a0 = 1 ; a0 <= 20 ; a0++ )
6601 for( a1 = 1 ; a1 < 21 ; a1++ )
6602 {
6603 fprintf( fp_mat, "pair: %.3u %.3u ", s0 + 1, s1 + 1 );
6604 fprintf( fp_mat, " acids: %c%c ", amino_acid[a0] ,
6605 amino_acid[a1] );
6606 fprintf( fp_mat, " number: %d ;\n", sbsmt[s0][s1][a0][a1]);
6607 }
6608
6609 return;
6610 }
6611
6612
6613
6614
6615 /* @funcstatic edialign_print_fragments **************************************
6616 **
6617 ** edialign_print_fragments
6618 **
6619 ** @param [u] d [struct multi_frag*] Undocumented
6620 ** @param [u] fp_ff2 [FILE*] Undocumented
6621 ** @return [void]
6622 *****************************************************************************/
6623
edialign_print_fragments(struct multi_frag * d,FILE * fp_ff2)6624 static void edialign_print_fragments(struct multi_frag *d , FILE *fp_ff2 )
6625 {
6626 struct multi_frag *fragment ;
6627
6628 fragment = d;
6629 while( fragment != NULL )
6630 {
6631 if( fragment->it )
6632 {
6633 gl_frg_count++ ;
6634 fprintf( fp_ff2, "%6d) ", gl_frg_count );
6635 fprintf( fp_ff2, "seq: %3d %3d ", fragment->s[0] + 1 ,
6636 fragment->s[1] + 1 );
6637 fprintf( fp_ff2, "beg: %7d %7d ", fragment->b[0] ,
6638 fragment->b[1] );
6639 fprintf( fp_ff2, "len: %3d ", fragment->ext );
6640 fprintf( fp_ff2, "wgt: %6.2f ", fragment->weight );
6641 fprintf( fp_ff2, "olw: %6.2f ", fragment->ow );
6642
6643 fprintf( fp_ff2, "it: %d ", fragment->it );
6644 if( fragment->sel )
6645 fprintf( fp_ff2, "cons " );
6646 else
6647 fprintf( fp_ff2, "incons " );
6648
6649 if( ( wgt_type == 3 ) || crick_strand )
6650 {
6651 if( fragment->trans )
6652 fprintf( fp_ff2, " P-frg" );
6653 else
6654 fprintf( fp_ff2, " N-frg" );
6655 if( fragment->trans )
6656 if( crick_strand )
6657 {
6658 if( fragment->cs )
6659 fprintf( fp_ff2, " -" );
6660 else
6661 fprintf( fp_ff2, " +" );
6662 }
6663 }
6664
6665
6666 fprintf( fp_ff2, "\n" );
6667 }
6668 fragment = fragment->next ;
6669 }
6670
6671 return;
6672 }
6673
6674
6675
6676
6677 #if 0
6678 /* @funcstatic edialign_weight_print ****************************************
6679 **
6680 ** edialign_weight_print
6681 **
6682 ** @param [u] wgt [float**] Undocumented
6683 ** @return [void]
6684 *****************************************************************************/
6685
6686 static void edialign_weight_print( float **wgt )
6687 {
6688 ajint l, s ;
6689 FILE *fp;
6690
6691 fp = fopen("weight_table","w");
6692
6693
6694 fprintf(fp," len1 = %d, len2 = %d\n\n",seqlen[0], seqlen[1] );
6695 fprintf(fp," \n %s \n\n", input_line );
6696 for( l = 1 ; l <= max_dia ; l++ )
6697 for( s = 0 ; s <= l * max_sim_score ; s++ )
6698 fprintf(fp," %d %d %7.8f \n", l, s, wgt[l][s] );
6699
6700 fclose(fp);
6701
6702 return;
6703 }
6704 #endif
6705
6706
6707
6708
6709
6710 /* @funcstatic edialign_ali_arrange ****************************************
6711 **
6712 ** edialign_ali_arrange
6713 **
6714 ** @param [r] ifragno [ajint] Undocumented
6715 ** @param [u] d [struct multi_frag*] Undocumented
6716 ** @param [u] fp [FILE*] Undocumented
6717 ** @param [u] seqout [AjPSeqout] Undocumented
6718 ** @param [u] fp3 [FILE*] Undocumented
6719 ** @param [u] fp4 [FILE*] Undocumented
6720 ** @param [u] fp_col_score [FILE*] Undocumented
6721 ** @param [r] isprot [AjBool] Undocumented
6722 ** @return [void]
6723 *****************************************************************************/
6724
edialign_ali_arrange(ajint ifragno,struct multi_frag * d,FILE * fp,AjPSeqout seqout,FILE * fp3,FILE * fp4,FILE * fp_col_score,AjBool isprot)6725 static void edialign_ali_arrange(ajint ifragno , struct multi_frag *d,
6726 FILE *fp, AjPSeqout seqout, FILE *fp3 ,
6727 FILE *fp4 ,
6728 FILE *fp_col_score, AjBool isprot)
6729 {
6730 ajint block_no;
6731 ajuint char_no ;
6732 ajint shift_cond;
6733 ajuint endlen;
6734 ajuint hv;
6735 ajuint i, j, p;
6736 ajint ii;
6737 ajint pn, k, l, lc;
6738 ajuint max_p;
6739 ajuint sv, s1, s2;
6740 ajint b1, b2, e, dif, lv, add, msf_lines;
6741 AjPSeq eseq = NULL;
6742
6743 char sim_char;
6744 float weak_wgt_type_thr = WEAK_WGT_TYPE_THR ;
6745 float strong_wgt_type_thr = STRONG_WGT_TYPE_THR ;
6746 float frac_plus, frac_minus, frac_nuc, f_inv ;
6747
6748 char **endseq = NULL;
6749 char **hseq = NULL;
6750 char *clear_seq = NULL;
6751 float *weight_count = NULL;
6752 ajint *plus_count = NULL;
6753 ajint *minus_count = NULL;
6754 ajint *nuc_count = NULL;
6755 ajint *frg_involved = NULL;
6756 float *plot = NULL; /* plot[i] = sum of weights of fragments involved at
6757 position i normalized such that the maximum value */
6758
6759 char gap_char = '-';
6760 char ambi_char = ' ';
6761 ajint *begin = NULL, *end = NULL, *b_len = NULL, *first_pos = NULL;
6762 ajint pl_int ;
6763 ajint b_size; /* size of fragments */
6764 struct multi_frag *fragments = NULL;
6765 struct multi_frag *dia = NULL;
6766 ajint **inv_shift = NULL;
6767 ajint char_per_line; /* number of residues per line in output file */
6768 char aligned;
6769 ajuint fragno = ifragno;
6770
6771 char_per_line = ( ( PAPER_WIDTH - 18 ) / 11) * 10;
6772
6773 dia = d;
6774
6775 if((endseq = (char **) calloc( seqnum , sizeof(char *))) == NULL)
6776 {
6777 printf(" problems with memory allocation for `endseq' ! \n \n");
6778 embExitBad();
6779 }
6780
6781 if((hseq = (char **) calloc(seqnum , sizeof(char *))) == NULL)
6782 {
6783 printf(" problems with memory allocation for `hseq' ! \n \n");
6784 embExitBad();
6785 }
6786
6787 if((begin = (int *) calloc( seqnum , sizeof(int))) == NULL)
6788 {
6789 printf(" problems with memory allocation for `begin' ! \n \n");
6790 embExitBad();
6791 }
6792
6793 if((end = (int *) calloc(seqnum , sizeof(int))) == NULL)
6794 {
6795 printf(" problems with memory allocation for `end' ! \n \n");
6796 embExitBad();
6797 }
6798
6799 if((b_len = (int *) calloc(seqnum , sizeof(int))) == NULL)
6800 {
6801 printf(" problems with memory allocation for `b_len' ! \n \n");
6802 embExitBad();
6803 }
6804
6805 if((first_pos = (int *) calloc(seqnum , sizeof(int))) == NULL)
6806 {
6807 printf(" problems with memory allocation for `first_pos' ! \n \n");
6808 embExitBad();
6809 }
6810
6811 if((shift = (int **) calloc(seqnum , sizeof(int *))) == NULL)
6812 {
6813 printf("not enough memory available for `shift' !!!!\n");
6814 fprintf(fp,"not enough memory available for `shift' !\n");
6815 embExitBad();
6816 }
6817
6818 for(hv=0 ; hv<seqnum ; hv++)
6819 if((shift[hv] = (int *) calloc((seqlen[hv]+2),sizeof(int))) == NULL)
6820 {
6821 printf("not enough memory available for `shift' !!!!\n");
6822 fprintf(fp,"not enough memory available for `shift' !\n");
6823 embExitBad();
6824 }
6825
6826
6827 if( ifragno >= 0 )
6828 {
6829
6830 for(hv=0;hv<seqnum;hv++)
6831 {
6832 begin[hv] = seqlen[hv];
6833 end[hv] = 1;
6834 }
6835
6836
6837 if( fragno > 0 )
6838 if((fragments = calloc(fragno,sizeof(struct multi_frag))) == NULL)
6839 {
6840 printf("not enough memory available for fragments!\n");
6841 fprintf(fp,"not enough memory available for fragments!\n");
6842 embExitBad();
6843 }
6844
6845 for( hv = 1 ; hv <= fragno ; hv++)
6846 {
6847 fragments[hv-1] = *dia;
6848 dia = dia->next;
6849 }
6850
6851 for( hv = 0 ; hv < fragno ; hv++ )
6852 for( j = 0 ; j < 2 ; j++ )
6853 {
6854 edialign_mini( &begin[ fragments[hv].s[j] ] ,
6855 fragments[hv].b[j] );
6856 edialign_maxi( &end[ fragments[hv].s[j] ] ,
6857 fragments[hv].b[j] +
6858 fragments[hv].ext );
6859 }
6860
6861 for(hv=0;hv<seqnum;hv++)
6862 {
6863 begin[hv] = 1;
6864 end[hv] = seqlen[hv]+1;
6865 }
6866
6867 b_size = 0;
6868
6869 for(i=0;i<seqnum;i++)
6870 {
6871 b_len[i] = end[i] - begin[i];
6872 edialign_maxi(&b_size,b_len[i]);
6873 }
6874
6875 for(i=0;i<seqnum;i++)
6876 for(hv=0;hv<(ajuint) b_len[i];hv++)
6877 shift[i][ begin[i]+hv ] = hv;
6878
6879 shift_cond = 1;
6880
6881 while(shift_cond)
6882 {
6883 shift_cond = 0;
6884
6885 for( hv = 0 ; hv < fragno ; hv++ )
6886 for(j=0;j<2;j++)
6887 {
6888 k = (j+1)%2;
6889 s1 = fragments[hv].s[j];
6890 s2 = fragments[hv].s[k];
6891 b1 = fragments[hv].b[j];
6892 b2 = fragments[hv].b[k];
6893 e = fragments[hv].ext;
6894
6895 for(l = e-1;l>=0;l--)
6896 {
6897 dif = shift[s2][b2+l] - shift[s1][b1+l];
6898 if (dif > 0 )
6899 {
6900 edialign_new_shift(s1,b1+l,dif);
6901 shift_cond = 1;
6902 }
6903 }
6904 }
6905 } /* while (shift_cond) */
6906
6907
6908
6909
6910
6911
6912
6913 endlen = 0;
6914
6915 for(hv=0;hv<seqnum;hv++)
6916 edialign_maxu(&endlen,shift[hv][ end[hv]-1 ] + 1);
6917
6918 for(hv=0;hv<seqnum;hv++)
6919 if( (endseq[hv] = calloc(endlen+1, sizeof(char) )) == NULL )
6920 {
6921 printf(" not enough memory available for printing results!\n");
6922 fprintf(fp," not enough memory available");
6923 fprintf(fp," for printing results!\n");
6924 embExitBad();
6925 }
6926
6927
6928 if( (inv_shift = (int **) calloc( seqnum , sizeof(int *) )) == NULL )
6929 {
6930 printf("not enough memory available for `inv_shift' !!!!\n");
6931 fprintf(fp,"not enough memory available for `inv_shift' !\n");
6932 embExitBad();
6933 }
6934
6935 for(hv=0 ; hv<seqnum ; hv++)
6936 if( (inv_shift[hv] = (int *) calloc( (endlen+2) , sizeof(int) ))
6937 == NULL )
6938 {
6939 printf("not enough memory available for `inv_shift' !!!!\n");
6940 fprintf(fp,"not enough memory available for `inv_shift' !\n");
6941 embExitBad();
6942 }
6943
6944 if( (clear_seq = (char *) calloc( (endlen+1) , sizeof(char) )) == NULL)
6945 {
6946 printf(" problems with memory allocation for `clear_seq' ! "
6947 "\n \n");
6948 embExitBad();
6949 }
6950
6951 if( (weight_count =
6952 (float *) calloc( ( endlen + 2 ) , sizeof(float) )) == NULL)
6953 {
6954 printf(" problems with memory allocation for `weight_count' "
6955 "!\n \n");
6956 embExitBad();
6957 }
6958
6959 if( (plot = (float *) calloc( ( endlen + 2 ) ,sizeof(float) )) == NULL)
6960 {
6961 printf(" problems with memory allocation for `plot' ! \n \n");
6962 embExitBad();
6963 }
6964
6965 if( (plus_count =
6966 (int *) calloc( ( endlen + 2 ) , sizeof( int ) )) == NULL)
6967 {
6968 printf(" problems with memory allocation for `plus_count' !\n \n");
6969 embExitBad();
6970 }
6971
6972 if( (minus_count =
6973 (int *) calloc( ( endlen + 2 ) , sizeof( int ) )) == NULL)
6974 {
6975 printf(" problems with memory allocation for `minus_count' "
6976 "!\n \n");
6977 embExitBad();
6978 }
6979
6980 if( (nuc_count =
6981 (int *) calloc( ( endlen + 2 ) , sizeof( int ) )) == NULL)
6982 {
6983 printf(" problems with memory allocation for `nuc_count' !\n \n");
6984 embExitBad();
6985 }
6986
6987 if( (frg_involved =
6988 (int *) calloc( ( endlen + 2 ) , sizeof( int ) )) == NULL)
6989 {
6990 printf(" problems with memory allocation for `frg_involved ' "
6991 "!\n \n");
6992 embExitBad();
6993 }
6994
6995
6996
6997 for(hv=0 ; hv<seqnum ; hv++)
6998 for(ii=1 ; ii <= seqlen[hv] ; ii++)
6999 inv_shift[hv][ shift[hv][ii] ] = ii;
7000
7001 for(hv=0;hv<seqnum;hv++)
7002 if( (hseq[hv] = calloc( (maxlen+1), sizeof(char) )) == NULL )
7003 {
7004 printf("not enough memory available for printing results! \n");
7005 fprintf(fp,"not enough memory available");
7006 fprintf(fp," for printing results! \n");
7007 embExitBad();
7008 }
7009 /*
7010 printf("endlen = %d \n\n", endlen);
7011 */
7012
7013 for(hv=0;hv<seqnum;hv++)
7014 for(i=0;i<endlen;i++)
7015 endseq[hv][i] = gap_char;
7016
7017 for(hv=0;hv<seqnum;hv++)
7018 for(i=begin[hv];i<(ajuint) end[hv];i++)
7019 hseq[hv][i] = tolower((int)seq[hv][i]);
7020
7021 for( hv = 0 ; hv < fragno ; hv++ )
7022 for(k=0;k<2;k++)
7023 for(ii = fragments[hv].b[k] ; ii < fragments[hv].b[k] +
7024 fragments[hv].ext ; ii++)
7025 hseq[ fragments[hv].s[k]][ii] = seq[fragments[hv].s[k]][ii];
7026
7027 for(hv=0;hv<seqnum;hv++)
7028 for(ii = begin[hv] ; ii < end[hv] ; ii++)
7029 endseq[hv][ shift[hv][ii] ] = hseq[hv][ii];
7030
7031 for(i=0;i<endlen;i++)
7032 clear_seq[i] = ' ';
7033
7034
7035
7036
7037
7038
7039 for(p=0;p<endlen;p++)
7040 {
7041 s1 = 0;
7042 while(
7043 ( endseq[s1][p] == tolower((int) endseq[s1][p] ) )
7044 && (s1 < (seqnum - 1) ) /* no capital letter */
7045 )
7046 s1++;
7047
7048 if(s1 < (seqnum - 1) )
7049 {
7050 for(s2 = s1+1 ; s2 < seqnum ; s2++)
7051 {
7052 if( endseq[s2][p] != tolower((int) endseq[s2][p] ) )
7053 /* endseq[s2][p] capital letter */
7054 {
7055 aligned = edialign_alignedPositions(gabiosclos,s1,
7056 inv_shift[s1][p],
7057 s2,
7058 edialign_succFrontier(gabiosclos,
7059 s1,
7060 inv_shift[s1][p],
7061 s2));
7062
7063 if (!aligned)
7064 /* i.e.endseq[s1][p] not aligned with end
7065 seq[s2][p]*/
7066 clear_seq[p] =ambi_char;
7067 }
7068 }
7069 }
7070 }
7071
7072
7073 if( mask )
7074 for(sv = 0 ; sv < seqnum ; sv++)
7075 for(hv = 0 ; hv < endlen ; hv++ )
7076 if( endseq[sv][hv] != gap_char )
7077 if( endseq[sv][hv] == tolower((int) endseq[sv][hv] ) )
7078 endseq[sv][hv] = '*' ;
7079
7080
7081 if( col_score )
7082 {
7083 fprintf(fp_col_score , "# 1 %u \n" , endlen );
7084 fprintf(fp_col_score,"# %s \n", upg_str);
7085 }
7086
7087 edialign_plot_calc( num_all_it_dia , endlen , weight_count , plot ,
7088 all_it_dia , fp_col_score);
7089
7090 edialign_wgt_type_count( num_all_it_dia , endlen , plus_count,
7091 minus_count, nuc_count , frg_involved,
7092 all_it_dia );
7093
7094
7095 lc = (endlen-1)/char_per_line;
7096 for(hv=0;hv<seqnum;hv++)
7097 first_pos[hv] = begin[hv] ;
7098
7099
7100 for( k = 0 ; k <= lc ; k++ )
7101 {
7102 for( hv = 0 ; hv < seqnum ; hv++ )
7103 {
7104 fprintf(fp, "%s", seq_name[hv] );
7105
7106 fprintf(fp,"%8d ", first_pos[hv]);
7107
7108
7109 for(i=0;i<edialign_minu2(char_per_line,endlen-k*char_per_line);
7110 i++)
7111 {
7112 if(!(i%10))fprintf(fp, " ");
7113 fprintf(fp, "%c",endseq[hv][k*char_per_line+i]);
7114 if(endseq[hv][k*char_per_line+i] != gap_char)
7115 first_pos[hv]++;
7116 }
7117 fprintf(fp, " \n");
7118 }
7119
7120 fprintf(fp," ");
7121 for( i = 0 ; i < edialign_minu2(char_per_line , endlen-k*
7122 char_per_line )
7123 ; i++ )
7124 {
7125 if(!(i%10))fprintf(fp, " ");
7126 fprintf(fp, "%c",clear_seq[k*char_per_line+i]);
7127 }
7128
7129 if( plot_num )
7130 fprintf(fp, " \n");
7131
7132
7133
7134 if( quali_num == 0 )
7135 for( pn = 0 ; pn < plot_num ; pn ++ )
7136 {
7137 fprintf(fp," ");
7138 for(i=0;i<edialign_minu2(char_per_line,endlen-k*
7139 char_per_line);i++)
7140 {
7141 if( !(i%10) )fprintf(fp, " ");
7142 if( plot[ k*char_per_line + i ] > pn )
7143 fprintf(fp, "*");
7144 else
7145 fprintf(fp, " ");
7146 }
7147 fprintf(fp, " \n");
7148
7149 if( plot_num == 1 )
7150 fprintf(fp, " \n");
7151 }
7152
7153
7154 if( quali_num )
7155 {
7156 for( i = 0 ; i < SEQ_NAME_LEN ; i++ )
7157 {
7158 fprintf(fp," ");
7159 }
7160
7161 fprintf(fp," ");
7162 for( i = 0 ; i < edialign_minu2(char_per_line,endlen-k*
7163 char_per_line);
7164 i++ )
7165 {
7166 if( !(i%10) )fprintf(fp, " ");
7167 pl_int = (ajint) (9 * plot[ k * char_per_line + i ] /
7168 plot_num);
7169 fprintf(fp, "%d", pl_int );
7170 }
7171 fprintf(fp, " \n");
7172 }
7173
7174 /***********************************************************************
7175
7176 fprintf(fp, " \n");
7177 if( wgt_type > 1 )
7178 {
7179 for( i = 0 ; i < SEQ_NAME_LEN ; i++ )
7180 {
7181 fprintf(fp," ");
7182 }
7183
7184 fprintf(fp," plus ");
7185 for( i = 0 ; i < edialign_mini2( char_per_line , endlen-k*
7186 char_per_line );
7187 i++ ) {
7188 if( !(i%10) )fprintf(fp, " ");
7189 fprintf(fp, "%d", plus_count[ k * char_per_line + i ] );
7190 }
7191 fprintf(fp, " \n");
7192 }
7193
7194 if( wgt_type > 1 ) {
7195 for( i = 0 ; i < SEQ_NAME_LEN ; i++ ) {
7196 fprintf(fp," ");
7197 }
7198
7199 fprintf(fp," minus ");
7200 for( i = 0 ; i < edialign_mini2( char_per_line ,endlen-k*
7201 char_per_line);
7202 i++ ) {
7203 if( !(i%10) )fprintf(fp, " ");
7204 fprintf(fp, "%d", minus_count[ k * char_per_line + i ] );
7205 }
7206 fprintf(fp, " \n");
7207 }
7208
7209 if( wgt_type > 1 ) {
7210 for( i = 0 ; i < SEQ_NAME_LEN ; i++ ) {
7211 fprintf(fp," ");
7212 }
7213
7214 fprintf(fp," nuc ");
7215 for( i = 0 ; i < edialign_mini2( char_per_line , endlen-k*
7216 char_per_line );
7217 i++ ) {
7218 if( !(i%10) )fprintf(fp, " ");
7219 fprintf(fp, "%d", nuc_count[ k * char_per_line + i ] );
7220 }
7221 fprintf(fp, " \n");
7222 fprintf(fp, " \n");
7223 }
7224
7225 ************************************************************************/
7226
7227 if( wgt_type_plot )
7228 if( wgt_type == 3 )
7229 {
7230
7231 fprintf(fp,"sim. level");
7232
7233 for( i = 0 ; i < SEQ_NAME_LEN ; i++ )
7234 {
7235 fprintf(fp," ");
7236 }
7237
7238 for(i=0; i < edialign_minu2(char_per_line,endlen-k*
7239 char_per_line);
7240 i++ )
7241 {
7242 if( !(i%10) )fprintf(fp, " ");
7243 sim_char = '.' ;
7244
7245 if( frg_involved[ k * char_per_line + i ] ) {
7246
7247 f_inv = (float) frg_involved[ k *
7248 char_per_line + i] ;
7249 frac_plus = plus_count[ k * char_per_line + i ] /
7250 f_inv ;
7251 frac_minus = minus_count[k * char_per_line + i] /
7252 f_inv ;
7253 frac_nuc = nuc_count[ k * char_per_line + i ] /
7254 f_inv ;
7255
7256 if ( frac_plus > weak_wgt_type_thr )
7257 {
7258 if( crick_strand )
7259 sim_char = 'f' ;
7260 else
7261 sim_char = 'p' ;
7262 }
7263 if ( frac_plus > strong_wgt_type_thr )
7264 {
7265 if( crick_strand )
7266 sim_char = 'F' ;
7267 else
7268 sim_char = 'P' ;
7269 }
7270 if ( frac_minus > weak_wgt_type_thr )
7271 sim_char = 'r' ;
7272 if ( frac_minus > strong_wgt_type_thr )
7273 sim_char = 'R' ;
7274
7275 if ( frac_nuc > weak_wgt_type_thr )
7276 sim_char = 'n' ;
7277 if ( frac_nuc > strong_wgt_type_thr )
7278 sim_char = 'N' ;
7279
7280 }
7281 fprintf(fp, "%c", sim_char );
7282 }
7283 fprintf(fp, " \n");
7284 fprintf(fp, " \n");
7285 }
7286
7287
7288
7289
7290
7291 fprintf(fp, " \n");
7292
7293 } /* for(k=0;k<=lc;k++) */
7294
7295
7296 if( fasta_file )
7297 {
7298
7299 for(sv=0;sv<seqnum;++sv)
7300 {
7301 eseq = ajSeqNewRes(endlen+1);
7302 ajSeqAssignNameC(eseq,seq_name[sv]);
7303 ajSeqAssignSeqC(eseq,endseq[sv]);
7304 if(isprot)
7305 ajSeqSetProt(eseq);
7306 else
7307 ajSeqSetNuc(eseq);
7308 ajSeqoutWriteSeq(seqout,eseq);
7309 ajSeqDel(&eseq);
7310 }
7311
7312 /*
7313 for(sv = 0 ; sv < seqnum ; sv++ )
7314 {
7315 fprintf(fp2,">%s", full_name[sv]);
7316 for(i = 0 ; i < endlen ; i++)
7317 {
7318 if( ! ( i % 50 ) )
7319 fprintf(fp2,"\n");
7320 fprintf(fp2,"%c", endseq[sv][i]);
7321 }
7322
7323 fprintf(fp2,"\n ");
7324 if( sv < ( seqnum - 1 ) )
7325 fprintf(fp2,"\n");
7326 }
7327 */
7328 ajSeqoutClose(seqout);
7329 }
7330
7331
7332 if( cw_file )
7333 {
7334 block_no = 0;
7335
7336 fprintf(fp4,"DIALIGN 2.1 multiple sequence alignment \n\n");
7337 fprintf(fp4,"// \n\n\n");
7338
7339 while( block_no * 60 < (ajint) endlen )
7340 {
7341 char_no = edialign_minu2( 60 , ( endlen - block_no * 60 ) ) ;
7342 for( sv = 0 ; sv < seqnum ; sv++ )
7343 {
7344 fprintf(fp4,"%s ", seq_name[sv] );
7345 for( i = 0 ; i < char_no ; i++)
7346 fprintf(fp4,"%c", endseq[sv][ block_no * 60 + i ] );
7347 fprintf(fp4,"\n");
7348 }
7349 fprintf(fp4,"\n\n");
7350 block_no++;
7351 }
7352
7353
7354 }
7355
7356
7357 if( msf_file )
7358 {
7359 msf_lines = endlen / 50;
7360 if(endlen % 50)
7361 msf_lines = msf_lines + 1;
7362
7363
7364 fprintf(fp3,"DIALIGN 2\n\n\n");
7365 fprintf(fp3," MSF: %u \n\n", endlen);
7366
7367 for( sv = 0 ; sv < seqnum ; sv++ )
7368 fprintf(fp3," Name: %s Len: %d \n", seq_name[sv],
7369 seqlen[sv] );
7370 fprintf(fp3,"\n// \n\n");
7371
7372 for(lv = 0 ; lv < msf_lines ; lv++ )
7373 {
7374 add = lv * 50;
7375 max_p = edialign_mini2( endlen - add , 50 );
7376
7377 for( sv = 0 ; sv < seqnum ; sv++ )
7378 {
7379 fprintf(fp3, "%s", seq_name[sv] );
7380 for(i=0 ; i < 4 ; i++ )
7381 fprintf(fp3, " ");
7382
7383 for(i = 0 ; i < max_p ; i++)
7384 {
7385 if( !(i%10) )fprintf(fp3, " ");
7386 if( endseq[sv][add + i] == '-' )
7387 fprintf(fp3,".");
7388 else
7389 fprintf(fp3,"%c", endseq[sv][add + i]);
7390 }
7391 fprintf(fp3,"\n");
7392 }
7393 fprintf(fp3,"\n\n");
7394 }
7395
7396 }
7397
7398
7399 if( ( seqnum > 2 ) && ( ref_seq == 0 ) )
7400 {
7401 fprintf(fp,"\n \n \n Sequence tree:\n");
7402 fprintf(fp," ==============\n\n");
7403
7404 if( ! strcmp( clust_sim , "av" ) )
7405 fprintf(fp,"Tree constructed using UPGMA ");
7406 fprintf(fp,"based on DIALIGN fragment weight scores");
7407
7408 if( ! strcmp( clust_sim , "max" ) )
7409 fprintf(fp,"Tree constructed using maximum linkage "
7410 "clustering");
7411
7412
7413 if( ! strcmp( clust_sim , "min" ) )
7414 fprintf(fp,"Tree constructed using minimum linkage "
7415 "clustering");
7416
7417
7418 fprintf(fp,"\n \n%s", upg_str);
7419 }
7420
7421 fprintf(fp,"\n \n \n");
7422
7423 for(hv=0;hv<seqnum;hv++)
7424 AJFREE(hseq[hv]);
7425
7426 for(hv=0;hv<seqnum;hv++)
7427 AJFREE(endseq[hv]);
7428
7429 if( fragno > 0 )
7430 AJFREE( fragments );
7431
7432 AJFREE(plot);
7433
7434 AJFREE(weight_count);
7435
7436
7437 } /* for(bc=0;bc<1;bc++) */
7438
7439
7440 for(hv=0;hv<seqnum;hv++)
7441 {
7442 free(shift[hv]);
7443 free(inv_shift[hv]);
7444 }
7445
7446 AJFREE(endseq);
7447 AJFREE (hseq);
7448 AJFREE(begin);
7449 AJFREE(end);
7450 AJFREE(b_len);
7451 AJFREE(first_pos);
7452
7453 AJFREE(inv_shift);
7454 AJFREE(shift);
7455 AJFREE(frg_involved);
7456 AJFREE(nuc_count);
7457 AJFREE(minus_count);
7458 AJFREE(plus_count);
7459 AJFREE(weight_count);
7460 AJFREE(plot);
7461 AJFREE(clear_seq);
7462
7463
7464 return;
7465 }
7466
7467
7468
7469
7470 /* @funcstatic edialign_para_print ****************************************
7471 **
7472 ** edialign_para_print
7473 **
7474 ** @param [u] s_f [char*] Undocumented
7475 ** @param [u] fpi [FILE*] Undocumented
7476 ** @return [void]
7477 *****************************************************************************/
7478
edialign_para_print(char * s_f,FILE * fpi)7479 static void edialign_para_print( char *s_f, FILE *fpi )
7480 {
7481 ajuint hv;
7482
7483 (void) s_f; /* make it used */
7484
7485 {
7486 if(cd_gobics)
7487 {
7488 fprintf(fpi," \n CHAOS / DIALIGN \n");
7489 fprintf(fpi," ***************\n \n");
7490
7491 if( BETA )
7492 fprintf(fpi," beta version\n\n");
7493
7494 fprintf(fpi," Program code written by \n");
7495 fprintf(fpi," Burkhard Morgenstern, Said Abdeddaim and "
7496 "Michael Brudno \n\n");
7497 fprintf(fpi," e-mail contact: ");
7498 fprintf(fpi,"dialign (at) gobics (dot) de \n \n");
7499 fprintf(fpi," Published research assisted");
7500 fprintf(fpi," by CHAOS / DIALIGN should cite: \n \n");
7501 fprintf(fpi," Michael Brudno et al.");
7502 fprintf(fpi," (2003)\n");
7503 fprintf(fpi," \"Fast and sensitive multiple "
7504 "alignment");
7505 fprintf(fpi," of large genomic sequences\" \n");
7506 fprintf(fpi," BMC Bioinformatics 4:66 \n");
7507 fprintf(fpi," http://www.biomedcentral.com/1471-"
7508 "2105/4/66 \n\n");
7509 }
7510 else
7511 {
7512 fprintf(fpi," \n DIALIGN 2.2.1 \n");
7513 fprintf(fpi," *************\n \n");
7514
7515 if( BETA )
7516 fprintf(fpi," beta version\n\n");
7517
7518 fprintf(fpi," Program code written by Burkhard");
7519 fprintf(fpi," Morgenstern and Said Abdeddaim \n");
7520 fprintf(fpi," e-mail contact: ");
7521 fprintf(fpi,"dialign (at) gobics (dot) de \n \n");
7522 fprintf(fpi," Published research assisted");
7523 fprintf(fpi," by DIALIGN 2 should cite: \n \n");
7524 fprintf(fpi," Burkhard Morgenstern");
7525 fprintf(fpi," (1999).\n");
7526
7527 fprintf(fpi," DIALIGN 2: improvement of the");
7528 fprintf(fpi," segment-to-segment\n approach");
7529 fprintf(fpi," to multiple sequence alignment.\n");
7530 fprintf(fpi," Bioinformatics 15,");
7531 fprintf(fpi," 211 - 218. \n\n");
7532 }
7533
7534 fprintf(fpi," For more information, please visit");
7535 fprintf(fpi," the DIALIGN home page at \n\n ");
7536 fprintf(fpi,"http://bibiserv.techfak.uni-bielefeld.de/dialign/");
7537 fprintf(fpi," \n \n");
7538
7539 fprintf(fpi," ***************************************"
7540 "*********************\n \n");
7541 }
7542
7543 if( online )
7544 {
7545 fprintf(fpi,"\n\n The following options have been used: \n\n") ;
7546 fprintf(fpi," - sequences are");
7547 if( wgt_type == 0 )
7548 fprintf(fpi," protein sequences \n");
7549 if( wgt_type == 1 )
7550 fprintf(fpi," nucleic acid sequences without translation "
7551 "option\n");
7552 if( wgt_type == 2 )
7553 fprintf(fpi," nucleic acid sequences with translation option\n");
7554 if( speed_optimized )
7555 fprintf(fpi," - speed optimized,");
7556 fprintf(fpi," see user guide for details \n");
7557 if( anchors )
7558 fprintf(fpi," - anchor points used\n" );
7559 fprintf(fpi,"\n");
7560 }
7561 else
7562 fprintf(fpi,"\n\n %s \n\n", input_line );
7563
7564 fprintf(fpi," \n");
7565
7566 fprintf(fpi," Aligned sequences: length:\n");
7567 fprintf(fpi," ================== =======\n \n");
7568
7569 for(hv=0;hv<seqnum;hv++)
7570 {
7571 fprintf(fpi, " %3u) ", hv + 1 );
7572 fprintf(fpi, "%s", seq_name[hv] );
7573 fprintf(fpi, " %9d\n",seqlen[hv]);
7574 }
7575
7576
7577
7578 fprintf(fpi, "\n Average seq. length:" );
7579 fprintf(fpi, " %9.1f \n", av_len );
7580
7581 fprintf(fpi,"\n\n Please note that only upper-case letters are");
7582 fprintf(fpi," considered to be aligned. \n");
7583
7584 fprintf(fpi,"\n\n Alignment (DIALIGN format):\n");
7585 fprintf(fpi," ===========================\n \n");
7586
7587 return;
7588 }
7589
7590
7591
7592
7593 #if 0
7594 /* @funcstatic edialign_para_read ****************************************
7595 **
7596 ** edialign_para_read
7597 **
7598 ** @param [r] num [int] Undocumented
7599 ** @param [u] arg [char**] Undocumented
7600 ** @return [void]
7601 *****************************************************************************/
7602
7603 static void edialign_para_read( int num , char ** arg )
7604 {
7605 ajint an = 1;
7606
7607
7608 while( an < num - 1 )
7609 {
7610
7611 if(
7612 strcmp( arg[an] , "-afc") && /* create file containing
7613 ALL fragments considered
7614 for alignment */
7615 strcmp( arg[an] , "-afc_v") && /* like -afc with fragments
7616 explicitly printed */
7617 strcmp( arg[an] , "-b1") && /* break */
7618 strcmp( arg[an] , "-b2") && /* break */
7619 strcmp( arg[an] , "-bs") && /* bubble sort */
7620 strcmp( arg[an] , "-csc") && /* column score output */
7621 strcmp( arg[an] , "-cs") && /* crick strand */
7622 strcmp( arg[an] , "-cw") && /* additional output file
7623 in clustalw format */
7624 strcmp( arg[an] , "-d1w") && /* old weight fkt */
7625
7626 strcmp( arg[an] , "-ds") &&
7627 strcmp( arg[an] , "-fa") && /* separate file with
7628 alignment in fasta format */
7629 strcmp( arg[an] , "-ff") && /* fragment file */
7630 strcmp( arg[an] , "-fn") && /* name of output file */
7631 strcmp( arg[an] , "-fop") && /* create file containing
7632 fragments selected for
7633 optimal pairwise alignment */
7634 strcmp( arg[an] , "-fsm") && /* create file containing
7635 consistent fragments in
7636 multiple alignment (in
7637 format needed for -xfr ) */
7638 strcmp( arg[an] , "-fsmv") && /* same as -fsm but verbose */
7639 strcmp( arg[an] , "-cd_gobics") && /* chaos + dialign @ gobics */
7640 strcmp( arg[an] , "-lgs_t") && /* genomic sequences, transl. */
7641 strcmp( arg[an] , "-istep") && /* max iteration steps */
7642 strcmp( arg[an] , "-it") && /* iteration */
7643 strcmp( arg[an] , "-iw") && /* ind. weights */
7644 strcmp( arg[an] , "-lgs") && /* genomic sequences */
7645 strcmp( arg[an] , "-lgsx") && /* genomic sequences, accurate +
7646 textual alignment */
7647 strcmp( arg[an] , "-lmax") && /* max. length of diag. */
7648 strcmp( arg[an] , "-lo") && /* long output */
7649 strcmp( arg[an] , "-ma") && /* mixed weights */
7650 strcmp( arg[an] , "-anc") && /* anchor regions */
7651 strcmp( arg[an] , "-mask") &&
7652 strcmp( arg[an] , "-mat") && /* calc. subst. freq. matrix */
7653 strcmp( arg[an] , "-mat_thr") && /* thr for sbst. fr. mat. */
7654 strcmp( arg[an] , "-max_link") && /* max. linkage clustering */
7655 strcmp( arg[an] , "-min_link") && /* min. linkage clustering */
7656 strcmp( arg[an] , "-mot") && /* motifs considered */
7657 strcmp( arg[an] , "-msf") && /* separate file with
7658 alignment in msf format */
7659 strcmp( arg[an] , "-n") && /* DNA/RNA sequences */
7660 strcmp( arg[an] , "-nas") && /* no anchor sorting */
7661 strcmp( arg[an] , "-nt") && /* DNA/RNA sequences with
7662 translation option */
7663 strcmp( arg[an] , "-nta") && /* no textual alignment */
7664 strcmp( arg[an] , "-o") && /* optimized */
7665 strcmp( arg[an] , "-online") && /* online */
7666 strcmp( arg[an] , "-ow") && /* overlap weights */
7667 strcmp( arg[an] , "-pamnd") && /* print av. max. number of frg. */
7668 strcmp( arg[an] , "-pand") && /* print av. number of diag. */
7669 strcmp( arg[an] , "-pao") && /* pairw. alignments only */
7670 strcmp( arg[an] , "-ref_seq") && /* seq_2, ... , seq_n
7671 aligned to seq_1 */
7672 strcmp( arg[an] , "-stars")&& /* maximum number of stars under
7673 alignment indicating relative similarity*/
7674 strcmp( arg[an] , "-pst") && /* print status */
7675 strcmp( arg[an] , "-sc") && /* self comparison */
7676 strcmp( arg[an] , "-smin") &&
7677 strcmp( arg[an] , "-stdo") && /* standard output */
7678 strcmp( arg[an] , "-ta") && /* textual alignment*/
7679 strcmp( arg[an] , "-thr") && /* threshold */
7680 strcmp( arg[an] , "-ts") && /* time stamps */
7681 strcmp( arg[an] , "-wgtpr") && /* weight print */
7682 strcmp( arg[an] , "-wgtprx") && /* weight print */
7683 strcmp( arg[an] , "-wtp") && /* weight type plot */
7684 strcmp( arg[an] , "-xfr") /* excluded fragments */
7685
7686 )
7687 {
7688 printf("\n \n Arguments in command line make no sense! \n \n");
7689 printf("\n Unknown option %s \n \n \n \n", arg[an] );
7690 embExitBad();
7691 }
7692
7693 if( !strcmp( arg[an] , "-afc") )
7694 afc_file = 1;
7695
7696 if( !strcmp( arg[an] , "-afc_v") ) {
7697 afc_file = 1;
7698 afc_filex = 1 ;
7699 }
7700
7701 if( !strcmp( arg[an] , "-b1") )
7702 break1 = 1;
7703
7704 if( !strcmp( arg[an] , "-b2") )
7705 break2 = 1;
7706
7707 if( !strcmp( arg[an] , "-bs") )
7708 bubblesort = 1;
7709
7710 if( !strcmp( arg[an] , "-csc") )
7711 col_score = 1;
7712
7713 if( !strcmp( arg[an] , "-cd_gobics") )
7714 cd_gobics = 1;
7715
7716 if( !strcmp( arg[an] , "-cs") )
7717 crick_strand = 1;
7718
7719 if( !strcmp( arg[an] , "-cw") )
7720 cw_file = 1;
7721
7722 if( !strcmp( arg[an] , "-ds") )
7723 dna_speed = 1 ;
7724
7725 if( !strcmp( arg[an] , "-fa") )
7726 fasta_file = 1;
7727
7728 if( !strcmp( arg[an] , "-ff") )
7729 frag_file = 1;
7730
7731 if( !strcmp( arg[an] , "-fop") )
7732 dia_pa_file = 1;
7733
7734 if( !strcmp( arg[an] , "-fsm") )
7735 frg_mult_file = 1;
7736
7737 if( !strcmp( arg[an] , "-fsmv") ) {
7738 frg_mult_file = 1;
7739 frg_mult_file_v = 1;
7740 }
7741
7742 if( !strcmp( arg[an] , "-it") )
7743 iter_cond_prob = 1;
7744
7745 if( !strcmp( arg[an] , "-iw") )
7746 overlap_weights = 0;
7747
7748 if( !strcmp( arg[an] , "-lgs") ) {
7749 wgt_type = 3 ;
7750 /* iter_cond_prob = 1 ;
7751 */
7752 threshold = 2.0 ;
7753 lmax = 30 ;
7754 thr_sim_score = 8 ;
7755 strict = 1 ;
7756 textual_alignment = 0 ;
7757 /* dia_pa_file = 1; */
7758 frag_file = 1 ;
7759 dna_speed = 1 ;
7760 crick_strand = 1 ;
7761 lgs_option = 1 ;
7762 print_status = 1 ;
7763 }
7764
7765 if( !strcmp( arg[an] , "-lgs_t") ) {
7766 wgt_type = 2 ;
7767 iter_cond_prob = 1 ;
7768 threshold = 0.0 ;
7769 lmax = 30 ;
7770 thr_sim_score = 8 ;
7771 strict = 1 ;
7772 textual_alignment = 0 ;
7773 dia_pa_file = 1;
7774 frag_file = 1 ;
7775 dna_speed = 1 ;
7776 print_status = 1 ;
7777 }
7778
7779 if( !strcmp( arg[an] , "-lgsx") ) {
7780 wgt_type = 3 ;
7781 iter_cond_prob = 1 ;
7782 strict = 1 ;
7783 frag_file = 1 ;
7784 crick_strand = 1 ;
7785 lgs_option = 1 ;
7786 print_status = 1 ;
7787 }
7788
7789 if( !strcmp( arg[an] , "-lo") )
7790 long_output = 1;
7791
7792 if( !strcmp( arg[an] , "-ma") ) {
7793 wgt_type = 3;
7794 }
7795
7796 if( !strcmp( arg[an] , "-anc") )
7797 anchors = 1;
7798
7799 if( !strcmp( arg[an] , "-mask") )
7800 mask = 1;
7801
7802 if( !strcmp( arg[an] , "-max_link") )
7803 strcpy (clust_sim , "max" );
7804
7805 if( !strcmp( arg[an] , "-min_link") )
7806 strcpy (clust_sim , "min" );
7807
7808 if( !strcmp( arg[an] , "-msf") )
7809 msf_file = 1;
7810
7811 if( !strcmp( arg[an] , "-n") ) {
7812 wgt_type = 1;
7813 }
7814
7815 if( !strcmp( arg[an] , "-nas") ) {
7816 nas = 1;
7817 }
7818
7819 if( !strcmp( arg[an] , "-nt") )
7820 wgt_type = 2;
7821
7822 if( !strcmp( arg[an] , "-nta") )
7823 textual_alignment = 0;
7824
7825 if( !strcmp( arg[an] , "-o") )
7826 {
7827 speed_optimized = 1 ;
7828 threshold = 0.5 ;
7829 lmax = 30 ;
7830 thr_sim_score = 8 ;
7831 }
7832
7833 if( !strcmp( arg[an] , "-ow") )
7834 ow_force = 1;
7835
7836 if( !strcmp( arg[an] , "-pao") )
7837 pa_only = 1;
7838
7839 if( !strcmp( arg[an] , "-pamnd") )
7840 pr_av_max_nd = 1;
7841
7842 if( !strcmp( arg[an] , "-pand") )
7843 pr_av_nd = 1;
7844
7845 if( !strcmp( arg[an] , "-pst") )
7846 print_status = 1;
7847
7848 if( !strcmp( arg[an] , "-red") )
7849 redundant = 1;
7850
7851 if( !strcmp( arg[an] , "-mat") )
7852 sf_mat = 1;
7853
7854 if( !strcmp( arg[an] , "-online") )
7855 online = 1;
7856
7857 if( !strcmp( arg[an] , "-ref_seq") )
7858 ref_seq = 1;
7859
7860 if( !strcmp( arg[an] , "-sc") )
7861 self_comparison = 1;
7862
7863 if( !strcmp( arg[an] , "-stdo") )
7864 standard_out = 1;
7865
7866 if( !strcmp( arg[an] , "-strict") )
7867 strict = 1;
7868
7869 if( !strcmp( arg[an] , "-ta") )
7870 textual_alignment = 1 ;
7871
7872 if( !strcmp( arg[an] , "-ts") )
7873 time_stamps = 1 ;
7874
7875 if( !strcmp( arg[an] , "-wgtpr") )
7876 wgt_print = 1 ;
7877
7878 if( !strcmp( arg[an] , "-wgtprx") )
7879 wgt_print_x = 1 ;
7880
7881 if( !strcmp( arg[an] , "-wtp") )
7882 wgt_type_plot = 1 ;
7883
7884 if( !strcmp( arg[an] , "-xfr") )
7885 exclude_frg = 1 ;
7886
7887
7888
7889
7890 /********************************************************************/
7891
7892
7893 if( !strcmp( arg[an] , "-fn") )
7894 {
7895 if( an + 2 < num )
7896 {
7897 strcpy( output_name , arg[++an] );
7898 default_name = 0;
7899 }
7900 else
7901 {
7902 printf("\n \n Arguments in command line don't make sense! "
7903 "\n");
7904 printf(" (Name of output file not properly specified) "
7905 "\n \n");
7906 embExitBad();
7907 }
7908 }
7909
7910
7911
7912
7913 /********************************************************************/
7914
7915
7916 if( !strcmp( arg[an] , "-istep") )
7917 {
7918 if( ( an + 2 < num ) && edialign_num_test( arg[an + 1] ) )
7919 max_itnum = atoi( arg[++an] );
7920 else
7921 {
7922 printf("\n \n Arguments in command line don't make "
7923 "sense! \n");
7924 printf(" (max_itnum not properly specified) \n \n");
7925 embExitBad();
7926 }
7927 }
7928
7929
7930
7931 /********************************************************************/
7932
7933
7934 if( !strcmp( arg[an] , "-lmax") )
7935 {
7936 if( ( an + 2 < num ) && edialign_num_test( arg[an + 1] ) )
7937 lmax = atoi( arg[++an] );
7938 else
7939 {
7940 printf("\n \n Arguments in command line don't make "
7941 "sense! \n");
7942 printf(" (lmax not properly specified) \n \n");
7943 embExitBad();
7944 }
7945 }
7946
7947
7948
7949 /********************************************************************/
7950
7951
7952 if( !strcmp( arg[an] , "-stars") )
7953 {
7954 if( ( an + 2 < num ) && edialign_num_test( arg[an + 1] ) )
7955 {
7956 plot_num = atoi( arg[++an] );
7957 quali_num = 0 ;
7958 }
7959 else
7960 {
7961 printf("\n \n Arguments in command line don't make "
7962 "sense! \n");
7963 printf(" (Number of \"*\" characters not properly "
7964 "specified) \n \n");
7965 embExitBad();
7966 }
7967 }
7968
7969
7970
7971 /********************************************************************/
7972
7973
7974 if( !strcmp( arg[an] , "-smin") )
7975 {
7976 if( (an + 2 < num) && edialign_num_test( arg[an + 1] ) )
7977 thr_sim_score = atoi( arg[++an] );
7978 else
7979 {
7980 printf("\n \n Arguments in command line don't make "
7981 "sense! \n");
7982 printf(" (Speed not properly specified) \n \n");
7983 embExitBad();
7984 }
7985 }
7986
7987
7988
7989 /********************************************************************/
7990
7991
7992 if( !strcmp( arg[an] , "-thr") )
7993 {
7994 if( (an + 2 < num) && edialign_num_test( arg[an + 1] ) )
7995 {
7996 threshold = atof( arg[++an] );
7997 }
7998 else
7999 {
8000 printf("\n \n Arguments in command line don't make "
8001 "sense! \n");
8002 printf(" (Threshod not properly specified) \n \n");
8003 embExitBad();
8004 }
8005 }
8006
8007
8008
8009 /********************************************************************/
8010
8011
8012 if( !strcmp( arg[an] , "-mat_thr") )
8013 {
8014 if( (an + 2 < num) && edialign_num_test( arg[an + 1] ) )
8015 {
8016 sf_mat_thr = atof( arg[++an] );
8017 }
8018 else
8019 {
8020 printf("\n \n Arguments in command line don't "
8021 "make sense! \n");
8022 printf(" (subst. mat. threshod not properly specified) "
8023 "\n \n");
8024 embExitBad();
8025 }
8026 }
8027
8028
8029
8030 /********************************************************************/
8031
8032
8033 if( !strcmp( arg[an] , "-mot") )
8034 {
8035 if( ( an + 4 < num ) &&
8036 edialign_num_test( arg[ an + 2 ] ) &&
8037 edialign_num_test( arg[ an + 3 ] )
8038 )
8039 {
8040 motifs = 1 ;
8041 strcpy( mot_regex , arg[++an] );
8042 mot_factor = atof( arg[++an] ) ;
8043 mot_offset_factor = atof( arg[++an] ) ;
8044 regex_len = strlen( mot_regex ) ;
8045 }
8046 else
8047 regex_format_complain();
8048 }
8049
8050
8051 /********************************************************************/
8052
8053 an++;
8054 }
8055
8056 return;
8057 }
8058 #endif
8059
8060
8061
8062
8063 /* @funcstatic edialign_erreur ****************************************
8064 **
8065 ** edialign_erreur
8066 **
8067 ** @param [u] message [const char*] Undocumented
8068 ** @return [void]
8069 *****************************************************************************/
8070
edialign_erreur(const char * message)8071 static void edialign_erreur(const char *message)
8072 {
8073 ajFatal("%s\n", message);
8074 }
8075
8076
8077
8078
8079 /* @funcstatic edialign_allouer ****************************************
8080 **
8081 ** edialign_allouer
8082 **
8083 ** @param [r] taille [size_t] Undocumented
8084 ** @return [void*] Undocumented
8085 *****************************************************************************/
8086
edialign_allouer(size_t taille)8087 static void* edialign_allouer(size_t taille)
8088 {
8089 void *pointeur;
8090
8091 if (taille == 0)
8092 taille = 1;
8093
8094 pointeur = (void *) malloc(taille);
8095
8096 if (pointeur==NULL)
8097 edialign_erreur("out of memory");
8098
8099 return(pointeur);
8100 }
8101
8102
8103
8104
8105 /* @funcstatic edialign_reallouer ****************************************
8106 **
8107 ** edialign_reallouer
8108 **
8109 ** @param [u] pointeur [void*] Undocumented
8110 ** @param [r] taille [size_t] Undocumented
8111 ** @return [void*] Undocumented
8112 *****************************************************************************/
8113
edialign_reallouer(void * pointeur,size_t taille)8114 static void* edialign_reallouer(void *pointeur, size_t taille)
8115 {
8116
8117 pointeur = (void *) realloc(pointeur, taille);
8118
8119 if (pointeur==NULL)
8120 edialign_erreur("out of memory");
8121
8122 return(pointeur);
8123 }
8124
8125
8126
8127
8128 /* @funcstatic edialign_liberer ****************************************
8129 **
8130 ** edialign_liberer
8131 **
8132 ** @param [d] pointeur [void*] Undocumented
8133 ** @return [void]
8134 *****************************************************************************/
8135
edialign_liberer(void * pointeur)8136 static void edialign_liberer(void *pointeur)
8137 {
8138
8139 free(pointeur);
8140
8141 return;
8142 }
8143
8144
8145
8146
8147 /* @funcstatic edialign_callouer_mat ****************************************
8148 **
8149 ** edialign_callouer_mat
8150 **
8151 ** @param [r] t_elt [size_t] Undocumented
8152 ** @param [r] nb_lig [size_t] Undocumented
8153 ** @param [r] nb_col [size_t] Undocumented
8154 ** @return [void**] Undocumented
8155 *****************************************************************************/
8156
edialign_callouer_mat(size_t t_elt,size_t nb_lig,size_t nb_col)8157 static void** edialign_callouer_mat(size_t t_elt, size_t nb_lig, size_t nb_col)
8158 {
8159 void **pointeur;
8160 ajint i;
8161 ajint imax = nb_lig;
8162
8163 pointeur = (void **) edialign_allouer(nb_lig * sizeof(void *));
8164
8165 for (i=0; i < imax; i++)
8166 pointeur[i] = (void *) edialign_allouer(nb_col * t_elt);
8167
8168 return(pointeur);
8169 }
8170
8171
8172
8173
8174 /* @funcstatic edialign_recallouer_mat **************************************
8175 **
8176 ** edialign_recallouer_mat
8177 **
8178 ** @param [u] pointeur [void**] Undocumented
8179 ** @param [r] t_elt [size_t] Undocumented
8180 ** @param [r] anc_nb_lig [size_t] Undocumented
8181 ** @param [r] nb_lig [size_t] Undocumented
8182 ** @param [r] nb_col [size_t] Undocumented
8183 ** @return [void**] Undocumented
8184 *****************************************************************************/
8185
edialign_recallouer_mat(void ** pointeur,size_t t_elt,size_t anc_nb_lig,size_t nb_lig,size_t nb_col)8186 static void** edialign_recallouer_mat(void **pointeur, size_t t_elt,
8187 size_t anc_nb_lig,
8188 size_t nb_lig, size_t nb_col)
8189 {
8190 size_t i;
8191
8192 if (anc_nb_lig == nb_lig)
8193 return(pointeur);
8194
8195 for (i=nb_lig; i < anc_nb_lig; i++)
8196 edialign_liberer(pointeur[i]);
8197
8198 pointeur = (void **) edialign_reallouer(pointeur, nb_lig * sizeof(void *));
8199
8200 for (i=anc_nb_lig; i < nb_lig; i++)
8201 pointeur[i] = (void *) edialign_allouer(nb_col * t_elt);
8202
8203 return(pointeur);
8204 }
8205
8206
8207
8208
8209 #if 0
8210 /* @funcstatic edialign_recallouer_mat2 **************************************
8211 **
8212 ** edialign_recallouer_mat2
8213 **
8214 ** @param [u] pointeur [void**] Undocumented
8215 ** @param [r] t_elt [size_t] Undocumented
8216 ** @param [r] anc_nb_lig [size_t] Undocumented
8217 ** @param [r] nb_lig [size_t] Undocumented
8218 ** @param [r] nb_col [size_t] Undocumented
8219 ** @return [void**] Undocumented
8220 *****************************************************************************/
8221
8222 static void** edialign_recallouer_mat2(void **pointeur, size_t t_elt,
8223 size_t anc_nb_lig, size_t nb_lig,
8224 size_t nb_col)
8225 {
8226 ajint i;
8227
8228 for (i=nb_lig; i < anc_nb_lig; i++)
8229 edialign_liberer(pointeur[i]);
8230
8231 pointeur = (void **) edialign_reallouer(pointeur, nb_lig * sizeof(void *));
8232
8233 for (i=0; i < edmin(anc_nb_lig, nb_lig); i++)
8234 pointeur[i] = (void *) edialign_reallouer(pointeur[i], nb_col * t_elt);
8235
8236 for (i=anc_nb_lig; i < nb_lig; i++)
8237 pointeur[i] = (void *) edialign_allouer(nb_col * t_elt);
8238
8239 return(pointeur);
8240 }
8241 #endif
8242
8243
8244
8245
8246 /* @funcstatic edialign_liberer_mat **************************************
8247 **
8248 ** edialign_liberer_mat
8249 **
8250 ** @param [d] pointeur [void**] Undocumented
8251 ** @param [r] nb_lig [size_t] Undocumented
8252 ** @return [void]
8253 *****************************************************************************/
8254
edialign_liberer_mat(void ** pointeur,size_t nb_lig)8255 static void edialign_liberer_mat(void **pointeur, size_t nb_lig)
8256 {
8257 size_t i;
8258
8259 for (i=0; i < nb_lig; i++)
8260 edialign_liberer(pointeur[i]);
8261
8262 edialign_liberer(pointeur);
8263
8264 return;
8265 }
8266
8267
8268
8269
8270 #if 0
8271 /* @funcstatic edialign_ouvrir **************************************
8272 **
8273 ** edialign_ouvrir. unused
8274 **
8275 ** @param [u] nomfich [char*] Undocumented
8276 ** @param [u] mode [char*] Undocumented
8277 ** @return [FILE*] Undocumented
8278 *****************************************************************************/
8279
8280 static FILE* edialign_ouvrir(char *nomfich, char *mode)
8281 {
8282 FILE *f;
8283
8284 if ((f = fopen(nomfich, mode)) == NULL)
8285 {
8286 printf("fopen(\"%s\",\"%s\"): ", nomfich, mode);
8287 edialign_erreur("enable to open file");
8288 }
8289
8290 return f;
8291 }
8292
8293
8294
8295
8296 /* @funcstatic edialign_fermer **************************************
8297 **
8298 ** edialign_fermer. unused
8299 **
8300 ** @param [d] f [FILE*] Undocumented
8301 ** @return [void]
8302 *****************************************************************************/
8303
8304 static void edialign_fermer(FILE *f)
8305 {
8306 if (fclose(f) == EOF)
8307 edialign_erreur("enable to close file");
8308
8309 return;
8310 }
8311
8312
8313
8314
8315 /* @funcstatic edialign_fcopie **************************************
8316 **
8317 ** edialign_fcopie. unused
8318 **
8319 ** @param [u] fdestination [FILE*] Undocumented
8320 ** @param [u] fsource [FILE*] Undocumented
8321 ** @return [void]
8322 *****************************************************************************/
8323
8324 static void edialign_fcopie(FILE *fdestination, FILE *fsource)
8325 {
8326 char line[TAILLE_MAX_LIGNE_FICHIER];
8327
8328 while (fgets(line, TAILLE_MAX_LIGNE_FICHIER, fsource) != NULL)
8329 fputs(line, fdestination);
8330
8331 fflush(fdestination);
8332 return;
8333 }
8334
8335
8336
8337
8338 /* @funcstatic edialign_strmin **************************************
8339 **
8340 ** edialign_strmin. unused
8341 **
8342 ** @param [u] p [char*] Undocumented
8343 ** @return [void]
8344 *****************************************************************************/
8345
8346 static void edialign_strmin(char *p)
8347 {
8348 char c;
8349
8350 for (; (c=*p); p++)
8351 *p = tolower(c);
8352
8353 return;
8354 }
8355
8356
8357
8358
8359 /* @funcstatic edialign_strmax **************************************
8360 **
8361 ** edialign_strmax. unused
8362 **
8363 ** @param [u] p [char*] Undocumented
8364 ** @return [void]
8365 *****************************************************************************/
8366
8367 static void edialign_strmax(char *p)
8368 {
8369 char c;
8370
8371 for (; (c=*p); p++)
8372 *p = toupper(c);
8373
8374 return;
8375 }
8376 #endif
8377
8378
8379
8380
8381 /* @funcstatic edialign_regex_complain **************************************
8382 **
8383 ** edialign_regex_complain
8384 **
8385 ** @param [r] regex [const char*] Undocumented
8386 ** @return [void]
8387 *****************************************************************************/
8388
edialign_regex_complain(const char * regex)8389 __noreturn static void edialign_regex_complain( const char *regex )
8390 {
8391 printf("\n bracket structure in regular expression makes no sense \n");
8392 printf("\n %s \n\n", regex) ;
8393 printf(" program terminated\n\n");
8394 embExitBad();
8395 }
8396
8397
8398
8399
8400 /* @funcstatic edialign_struc_check **************************************
8401 **
8402 ** edialign_struc_check
8403 **
8404 ** @param [u] regex [char*] Undocumented
8405 ** @return [void]
8406 *****************************************************************************/
8407
edialign_struc_check(char * regex)8408 static void edialign_struc_check( char *regex )
8409 {
8410 ajint p;
8411 ajint bracket_count = 0;
8412
8413 for( p = 0 ; p < MAX_REGEX; p++ )
8414 {
8415 char_num[ p ] = 0 ;
8416 }
8417
8418 for( p = 0 ; p < regex_len ; p++ )
8419 {
8420
8421 if( regex[ p ] == '[' )
8422 bracket_count++ ;
8423
8424 if( ( regex[ p ] != '[' ) && ( regex[ p ] != ']' ) )
8425 {
8426 char_num[ mot_len ]++ ;
8427 regex[ p ] = toupper((int) regex[ p ] ) ;
8428 }
8429
8430 if( regex[ p ] == ']' )
8431 bracket_count-- ;
8432
8433 if( ( regex[ p ] == ']' ) || ( bracket_count == 0 ) )
8434 mot_len++ ;
8435
8436
8437 if( ( bracket_count < 0 ) || ( bracket_count > 1 ) )
8438 edialign_regex_complain( regex ) ;
8439
8440
8441 }
8442
8443 if( bracket_count != 0 )
8444 edialign_regex_complain( regex ) ;
8445
8446 return;
8447 }
8448
8449
8450
8451
8452 /* @funcstatic edialign_regex_parse *****************************************
8453 **
8454 ** edialign_regex_parse
8455 **
8456 ** @param [u] mot_regex [char*] Undocumented
8457 ** @return [void]
8458 *****************************************************************************/
8459
edialign_regex_parse(char * mot_regex)8460 static void edialign_regex_parse(char *mot_regex)
8461 {
8462 ajuint i;
8463 ajint p;
8464 ajint mp = 0;
8465 ajint in_bracket = 0;
8466 ajint char_c = 0 ;
8467
8468
8469 if((mot_pos = ( short ** ) calloc( seqnum , sizeof( short *) ) ) == NULL)
8470 {
8471 printf(" problems with memory allocation");
8472 printf(" for `mot_pos' ! \n \n");
8473 embExitBad();
8474 }
8475
8476 for(i = 0; i < seqnum; i++)
8477 if((mot_pos[i] = ( short *) calloc((seqlen[i] + 2),
8478 sizeof(short))) == NULL)
8479 {
8480 printf(" problems with memory allocation");
8481 printf(" for `mot_pos[%u]' ! \n \n", i);
8482 embExitBad();
8483 }
8484
8485
8486
8487
8488 edialign_struc_check( mot_regex );
8489
8490 /*
8491 printf(" \n regex_len = %d\n", regex_len) ;
8492 printf(" mot_len = %d\n", mot_len) ;
8493 printf("\n");
8494
8495 for( p = 0 ; p < mot_len ; p++ ) {
8496 printf(" %d ", char_num[ p ] );
8497 }
8498 printf("\n\n");
8499 */
8500
8501 for( p = 0 ; p < mot_len ; p++ )
8502 {
8503 mot_char[p] = (char *) calloc(char_num[p], sizeof(char));
8504 }
8505
8506
8507 /* PROBLEM */
8508
8509
8510 for( p = 0 ; p < regex_len ; p++ )
8511 {
8512 if( mot_regex[ p ] == '[' )
8513 {
8514 in_bracket = 1 ;
8515 }
8516
8517 if( mot_regex[ p ] == ']' )
8518 {
8519 in_bracket = 0 ;
8520 char_c = 0 ;
8521 mp++ ;
8522 }
8523
8524 if( ( mot_regex[ p ] != '[' ) && ( mot_regex[ p ] != ']' ) )
8525 { /* char */
8526 if( in_bracket )
8527 {
8528 mot_char[ mp ][ char_c ] = mot_regex[ p ] ;
8529 char_c++;
8530 }
8531 else
8532 { /* not in bracket */
8533 char_c = 0 ;
8534 mot_char[ mp ][ 0 ] = mot_regex[ p ] ;
8535 mp++ ;
8536 }
8537 }
8538 }
8539
8540 /*
8541 for( mp = 0 ; mp < mot_len ; mp++ ) {
8542 printf(" position %d ", mp + 1 );
8543 for( p = 0 ; p < char_num[ mp ] ; p++ ) {
8544 printf(" %c ", mot_char[ mp ][ p ] ) ;
8545 }
8546 printf("\n");
8547 }
8548 */
8549
8550 return;
8551 }
8552
8553
8554
8555
8556 /* @funcstatic edialign_seq_parse *****************************************
8557 **
8558 ** edialign_seq_parse
8559 **
8560 ** @param [u] mot_regex_unused [char*] Undocumented
8561 ** @return [void]
8562 *****************************************************************************/
8563
edialign_seq_parse(char * mot_regex_unused)8564 static void edialign_seq_parse(char *mot_regex_unused)
8565 {
8566 ajuint sn;
8567 ajint ok;
8568 ajint sp;
8569 ajint rp;
8570 ajint hv;
8571 ajint match = 0;
8572
8573 (void) mot_regex_unused; /* make it used */
8574
8575 max_mot_offset = (float)
8576 (sqrt(-log(0.1) * 10 / mot_factor) * mot_offset_factor);
8577
8578
8579 for(sn = 0 ; sn < seqnum ; sn++)
8580 for(sp = 0 ; sp < ( seqlen[ sn ] - mot_len + 1 ) ; sp++)
8581 {
8582 ok = 1 ;
8583 rp = 0 ;
8584 while( ok && ( rp < mot_len))
8585 {
8586 if(mot_char[ rp ][ 0 ] != 'X')
8587 {
8588 match = 0 ;
8589 for(hv = 0 ; hv < char_num[ rp ] ; hv++)
8590 {
8591 if(mot_char[rp][hv] == seq[sn][sp + rp])
8592 {
8593 match = 1 ;
8594 }
8595 }
8596 }
8597 ok = match;
8598 rp++;
8599 }
8600
8601 if( ok )
8602 {
8603 printf( " motif in seq %u at pos %d \n",sn + 1 ,sp + 1);
8604 mot_pos[ sn ][ sp + 1 ] = 1 ;
8605 }
8606 else
8607 mot_pos[ sn ][ sp + 1 ] = 0 ;
8608 }
8609
8610
8611 printf("\n") ;
8612
8613 /*
8614 for( sn = 0 ; sn < seqnum ; sn++ ) {
8615 printf(" %s \n", seq[ sn ] ) ;
8616 printf(" ");
8617 for( i = 1 ; i <= seqlen[ sn ] ; i++ ) {
8618
8619
8620 if( mot_pos[ sn ][ i ] )
8621 printf("*");
8622 else
8623 printf(" ");
8624 }
8625 printf("\n\n" ) ;
8626 }
8627 printf("\n" ) ;
8628 */
8629
8630 return;
8631 }
8632
8633
8634
8635
8636 #if 0
8637 /* @funcstatic edialign_regex_format_complain *******************************
8638 **
8639 ** edialign_regex_format_complain
8640 **
8641 ** @return [void]
8642 *****************************************************************************/
8643
8644 static void edialign_regex_format_complain(void)
8645 {
8646 printf("\n \n Arguments in command line don't make sense! \n");
8647 printf(" (Motifs not properly specified) \n \n");
8648 printf(" With the motif-search option, the program call is:\n\n");
8649 printf(" ./dialign2-2 [para] -mot <regex> <fct1> <fct2> ");
8650 printf("[para] <seq> \n\n");
8651 printf(" where \n <regex> is a regular expression,");
8652 printf(" e.g. \"AT[CG]XT\",\n");
8653 printf(" <fct1> is a weighting factor \n");
8654 printf(" <fct2> is a weighting factor \n");
8655 printf(" <seq> is the input sequence file and \n");
8656 printf(" [para] are (optional)");
8657 printf(" additional program parameters\n\n" );
8658 embExitBad();
8659 }
8660 #endif
8661
8662
8663
8664
8665 /* @funcstatic edialign_mot_dist_factor *************************************
8666 **
8667 ** edialign_mot_dist_factor
8668 **
8669 ** @param [r] offset [ajint] Undocumented
8670 ** @param [r] parameter [float] Undocumented
8671 ** @return [float] Undocumented
8672 *****************************************************************************/
8673
edialign_mot_dist_factor(ajint offset,float parameter)8674 static float edialign_mot_dist_factor(ajint offset , float parameter)
8675 {
8676 float mdf , parameter2;
8677 ajint offset2 ;
8678
8679 offset2 = offset * offset ;
8680 parameter2 = parameter * parameter ;
8681
8682 /* factor1 = (float) offset2 / (parameter2 * 10); */
8683 mdf = (float) (exp(-(offset2) / (parameter2 * 10)));
8684
8685 return mdf ;
8686 }
8687
8688
8689
8690
8691 /* @funcstatic edialign_rel_wgt_calc *************************************
8692 **
8693 ** edialign_rel_wgt_calc
8694 **
8695 ** @param [r] l1 [ajint] Undocumented
8696 ** @param [r] l2 [ajint] Undocumented
8697 ** @param [u] rel_wgt [float**] Undocumented
8698 ** @return [void]
8699 *****************************************************************************/
8700
edialign_rel_wgt_calc(ajint l1,ajint l2,float ** rel_wgt)8701 static void edialign_rel_wgt_calc(ajint l1, ajint l2, float **rel_wgt)
8702 {
8703 ajint l;
8704 ajint m;
8705 ajint mss = 0;
8706 float ent;
8707 float factor;
8708 float l1f;
8709 float l2f;
8710 float av_sim_score = 0.;
8711 double t_pr;
8712 double pr400;
8713 double **tpr = NULL;
8714
8715 /*
8716 printf(" it %d, rel_wgt_calc: len = %d , %d \n", istep , l1 , l2 );
8717 */
8718
8719 if( rel_wgt == wgt_prot )
8720 {
8721 tpr = tp400_prot ;
8722 mss = max_sim_score ;
8723 av_sim_score = av_sim_score_pep ;
8724 }
8725
8726 if( rel_wgt == wgt_dna )
8727 {
8728 tpr = tp400_dna ;
8729 mss = 1 ;
8730 av_sim_score = av_sim_score_nuc ;
8731 }
8732
8733 if( rel_wgt == wgt_trans )
8734 {
8735 tpr = tp400_trans ;
8736 mss = max_sim_score ;
8737 av_sim_score = av_sim_score_pep ;
8738 }
8739
8740
8741
8742 l1f = (float) l1;
8743 l2f = (float) l2;
8744
8745 factor = ( l1f * l2f ) / (float) 400.00;
8746
8747
8748 for( l = 1 ; l <= max_dia; l++ )
8749 for( m = 0 ; m <= l * mss ; m++ )
8750 {
8751 rel_wgt[l][m] = 0;
8752
8753
8754 if( tpr[l][m] )
8755 if( m > av_sim_score * l )
8756
8757 {
8758 pr400 = tpr[l][m];
8759
8760 if( pr400 > 0.0000000001 )
8761 t_pr = 1 - pow( 1 - pr400 , factor );
8762 else
8763 t_pr = pr400 * factor;
8764
8765 ent = 0;
8766
8767 if(t_pr)
8768 ent = (float) -log( t_pr );
8769
8770 if( ent > threshold )
8771 rel_wgt[l][m] = ent;
8772 }
8773 }
8774
8775 return;
8776 }
8777
8778
8779
8780
8781 /* @funcstatic edialign_wgt_prnt_prot *************************************
8782 **
8783 ** edialign_wgt_prnt_prot
8784 **
8785 ** @return [void]
8786 *****************************************************************************/
8787
edialign_wgt_prnt_prot(void)8788 static void edialign_wgt_prnt_prot(void)
8789 {
8790 ajint i;
8791 ajint j;
8792
8793 printf(" \n\n weight scores for PROTEIN fragments\n\n" );
8794 printf(" sequence lengths = %d , %d \n\n", seqlen[0] , seqlen[1] ) ;
8795 for( i = 1 ; i <= max_dia ; i++ ) {
8796 for( j = 0 ; j <= ( i * 15 ) ; j++ )
8797 printf(" %3d %3d %f \n", i , j , wgt_prot[ i ][ j ] );
8798 }
8799
8800 return;
8801 }
8802
8803
8804
8805
8806 /* @funcstatic edialign_wgt_prnt_dna *************************************
8807 **
8808 ** edialign_wgt_prnt_dna
8809 **
8810 ** @return [void]
8811 *****************************************************************************/
8812
edialign_wgt_prnt_dna(void)8813 static void edialign_wgt_prnt_dna(void)
8814 {
8815 ajint i;
8816 ajint j;
8817
8818 printf(" \n\n weight scores for NON-TRANSLATED DNA fragments\n\n" );
8819 printf(" sequence lengths = %d , %d \n\n", seqlen[0] , seqlen[1] ) ;
8820 for( i = 1 ; i <= max_dia ; i++ )
8821 {
8822 for( j = 0 ; j <= i ; j++ )
8823 printf(" %3d %3d %f \n", i , j , wgt_dna[ i ][ j ] );
8824 }
8825
8826 return;
8827 }
8828
8829
8830
8831
8832 /* @funcstatic edialign_wgt_prnt_trans *************************************
8833 **
8834 ** edialign_wgt_prnt_trans
8835 **
8836 ** @return [void]
8837 *****************************************************************************/
8838
edialign_wgt_prnt_trans(void)8839 static void edialign_wgt_prnt_trans(void)
8840 {
8841 ajint i;
8842 ajint j;
8843
8844 printf(" \n\n weight scores for TRANSLATED DNA fragments\n\n" );
8845 printf(" sequence lengths = %d , %d \n\n", seqlen[0] , seqlen[1] ) ;
8846 for( i = 1 ; i <= max_dia ; i++ ) {
8847 for( j = 0 ; j <= ( i * 15 ) ; j++ )
8848 printf(" %3d %3d %f \n", i , j , wgt_trans[ i ][ j ] );
8849 }
8850
8851 return;
8852 }
8853
8854
8855
8856
8857 /* @funcstatic edialign_wgt_prnt *************************************
8858 **
8859 ** edialign_wgt_prnt
8860 **
8861 ** @return [void]
8862 *****************************************************************************/
8863
edialign_wgt_prnt(void)8864 static void edialign_wgt_prnt(void)
8865 {
8866 if (wgt_type == 0 )
8867 edialign_wgt_prnt_prot( );
8868
8869 if (wgt_type % 2 )
8870 edialign_wgt_prnt_dna( );
8871
8872 if (wgt_type > 1 )
8873 edialign_wgt_prnt_trans( );
8874
8875 return;
8876 }
8877
8878
8879
8880
8881 /* @funcstatic edialign_mem_alloc *************************************
8882 **
8883 ** edialign_mem_alloc
8884 **
8885 ** @return [void]
8886 *****************************************************************************/
8887
edialign_mem_alloc(void)8888 static void edialign_mem_alloc(void)
8889 {
8890 /* allocates memory for `tp400_xxx', `wgt_xxx' */
8891
8892 ajint i;
8893
8894 if( wgt_type == 0 )
8895 {
8896 if( (tp400_prot = (double **) calloc((max_dia + 1),sizeof(double*)))
8897 == NULL)
8898 {
8899 printf(" problems with memory allocation for `tp400_prot' ! "
8900 "\n \n");
8901 embExitBad();
8902 }
8903
8904 if( ( wgt_prot = (float **) calloc( (max_dia+1) , sizeof(float*) ))
8905 == NULL)
8906 {
8907 printf(" problems with memory allocation for `weights' ! \n \n");
8908 embExitBad();
8909 }
8910 }
8911
8912 if( wgt_type % 2 )
8913 {
8914 if( (tp400_dna = (double **) calloc( ( max_dia + 1 ) ,
8915 sizeof(double*)))
8916 == NULL)
8917 {
8918 printf(" problems with memory allocation for `tp400_dna' ! "
8919 "\n \n");
8920 embExitBad();
8921 }
8922
8923 if( ( wgt_dna = (float **) calloc( (max_dia+1) , sizeof(float*) ))
8924 == NULL)
8925 {
8926 printf(" problems with memory allocation for `weights' ! \n \n");
8927 embExitBad();
8928 }
8929 }
8930
8931
8932 if( wgt_type > 1 )
8933 {
8934 if( (tp400_trans = (double **) calloc( ( max_dia + 1 ) ,
8935 sizeof(double*) ))
8936 == NULL)
8937 {
8938 printf(" problems with memory allocation for `tp400_trans' ! "
8939 "\n \n");
8940 embExitBad();
8941 }
8942
8943 if( ( wgt_trans = (float **) calloc( (max_dia+1) , sizeof(float*) ))
8944 == NULL)
8945 {
8946 printf(" problems with memory allocation for `weights' ! \n \n");
8947 embExitBad();
8948 }
8949 }
8950
8951 for( i = 1 ; i <= max_dia ; i++ )
8952 {
8953
8954
8955 if( wgt_type == 0 )
8956 {
8957 if( (tp400_prot[i] =
8958 (double *) calloc(((i + 1) * max_sim_score),sizeof(double)))
8959 == NULL)
8960 {
8961 printf(" problems with memory allocation for `tp400_prot' ! "
8962 "\n \n");
8963 embExitBad();
8964 }
8965
8966 if( (wgt_prot[i] =
8967 (float *) calloc( ((i+1) * max_sim_score ) , sizeof(float) ))
8968 == NULL)
8969 {
8970 printf(" problems with memory allocation for `weights'!\n\n");
8971 embExitBad();
8972 }
8973 }
8974
8975
8976 if( wgt_type % 2 )
8977 {
8978 if( (tp400_dna[i] =
8979 (double *) calloc( ((i + 1) ) , sizeof(double) ))
8980 == NULL)
8981 {
8982 printf(" problems with memory allocation for `tp400_dna' !"
8983 "\n \n");
8984 embExitBad();
8985 }
8986
8987 if( (wgt_dna[i] =
8988 (float *) calloc( ((i+1) ) , sizeof(float) ))
8989 == NULL)
8990 {
8991 printf(" problems with memory allocation for `weights'!\n\n");
8992 embExitBad();
8993 }
8994 }
8995
8996
8997 if( wgt_type > 1 )
8998 {
8999 if( (tp400_trans[i] =
9000 (double *) calloc(((i + 1) * max_sim_score),sizeof(double)))
9001 == NULL)
9002 {
9003 printf(" problems with memory allocation for `tp400_trans' "
9004 "%d ! \n \n", i);
9005 embExitBad();
9006 }
9007
9008 if( (wgt_trans[i] =
9009 (float *) calloc( ((i+1) * max_sim_score ) , sizeof(float) ))
9010 == NULL)
9011 {
9012 printf(" problems with memory allocation for `weights'!\n\n");
9013 embExitBad();
9014 }
9015 }
9016 }
9017
9018 return;
9019 }
9020