1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 #if 0
6 int seqlen( char *seq )
7 {
8 int val = 0;
9 while( *seq )
10 if( *seq++ != '-' ) val++;
11 return( val );
12 }
13 #else
seqlen(char * seq)14 int seqlen( char *seq )
15 {
16 int val = 0;
17 if( *newgapstr == '-' )
18 {
19 while( *seq )
20 if( *seq++ != '-' ) val++;
21 }
22 else
23 {
24 while( *seq )
25 {
26 if( *seq != '-' && *seq != *newgapstr ) val++;
27 seq++;
28 }
29 }
30 return( val );
31 }
32 #endif
33
intlen(int * num)34 int intlen( int *num )
35 {
36 int value = 0;
37 while( *num++ != -1 ) value++;
38 return( value );
39 }
40
seqcheck(char ** seq)41 char seqcheck( char **seq )
42 {
43 int i, len;
44 char **seqbk = seq;
45 while( *seq )
46 {
47 len = strlen( *seq );
48 for( i=0; i<len; i++ )
49 {
50 if( amino_n[(int)(*seq)[i]] == -1 )
51 {
52
53 reporterr( "========================================================================= \n" );
54 reporterr( "========================================================================= \n" );
55 reporterr( "=== \n" );
56 reporterr( "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
57 reporterr( "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
58 reporterr( "=== \n" );
59 reporterr( "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
60 reporterr( "=== %% mafft --anysymbol input > output\n" );
61 reporterr( "=== \n" );
62 reporterr( "========================================================================= \n" );
63 reporterr( "========================================================================= \n" );
64 return( (int)(*seq)[i] );
65 }
66 }
67 seq++;
68 }
69 return( 0 );
70 }
71
scmx_calc(int icyc,char ** aseq,double * effarr,double ** scmx)72 void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx )
73 {
74 int i, j, lgth;
75
76 lgth = strlen( aseq[0] );
77 for( j=0; j<lgth; j++ )
78 {
79 for( i=0; i<nalphabets; i++ )
80 {
81 scmx[i][j] = 0;
82 }
83 }
84 for( i=0; i<icyc+1; i++ )
85 {
86 int id;
87 id = amino_n[(int)aseq[i][0]];
88 scmx[id][0] += (double)effarr[i];
89 }
90 for( j=1; j<lgth-1; j++ )
91 {
92 for( i=0; i<icyc+1; i++ )
93 {
94 int id;
95 id = amino_n[(int)aseq[i][j]];
96 scmx[id][j] += (double)effarr[i];
97 }
98 }
99 for( i=0; i<icyc+1; i++ )
100 {
101 int id;
102 id = amino_n[(int)aseq[i][lgth-1]];
103 scmx[id][lgth-1] += (double)effarr[i];
104 }
105 }
106
exitall(char arr[])107 void exitall( char arr[] )
108 {
109 reporterr( "%s\n", arr );
110 exit( 1 );
111 }
112
display(char ** seq,int nseq)113 void display( char **seq, int nseq )
114 {
115 int i, imax;
116 char b[121];
117
118 if( !disp ) return;
119 if( nseq > DISPSEQF ) imax = DISPSEQF;
120 else imax = nseq;
121 reporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
122 for( i=0; i<+imax; i++ )
123 {
124 strncpy( b, seq[i]+DISPSITEI, 120 );
125 b[120] = 0;
126 reporterr( "%3d %s\n", i+1, b );
127 }
128 }
129 #if 0
130 double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
131 {
132 int i, j, k;
133 double score;
134 double tmpscore;
135 char *mseq1, *mseq2;
136 double efficient;
137 char xxx[100];
138
139 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
140 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
141
142 score = 0.0;
143 for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ )
144 {
145 efficient = eff1[i] * eff2[j];
146 mseq1 = seq1[i];
147 mseq2 = seq2[j];
148 tmpscore = 0.0;
149 for( k=0; k<len; k++ )
150 {
151 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
152 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
153
154 if( mseq1[k] == '-' )
155 {
156 tmpscore += penalty;
157 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
158 while( mseq1[++k] == '-' )
159 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
160 k--;
161 if( k >len-2 ) break;
162 continue;
163 }
164 if( mseq2[k] == '-' )
165 {
166 tmpscore += penalty;
167 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
168 while( mseq2[++k] == '-' )
169 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
170 k--;
171 if( k > len-2 ) break;
172 continue;
173 }
174 }
175 score += (double)tmpscore * efficient;
176 #if 1
177 sprintf( xxx, "%f", score );
178 // reporterr( "## score in intergroup_score = %f\n", score );
179 #endif
180 }
181 #if 0
182 reporterr( "###score = %f\n", score );
183 #endif
184 #if 0
185 reporterr( "## score in intergroup_score = %f\n", score );
186 #endif
187 return( score );
188 }
189 #endif
190
intergroup_score_consweight(char ** seq1,char ** seq2,double * eff1,double * eff2,int clus1,int clus2,int len,double * value)191 void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
192 {
193 int i, j, k;
194 int len2 = len - 2;
195 int ms1, ms2;
196 double tmpscore;
197 char *mseq1, *mseq2;
198 double efficient;
199
200 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
201 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
202
203
204
205 *value = 0.0;
206 for( i=0; i<clus1; i++ )
207 {
208 for( j=0; j<clus2; j++ )
209 {
210 efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
211 mseq1 = seq1[i];
212 mseq2 = seq2[j];
213 tmpscore = 0.0;
214 for( k=0; k<len; k++ )
215 {
216 ms1 = (int)mseq1[k];
217 ms2 = (int)mseq2[k];
218 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
219 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
220
221 if( ms1 == (int)'-' )
222 {
223 tmpscore += (double)penalty;
224 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
225 while( (ms1=(int)mseq1[++k]) == (int)'-' )
226 ;
227 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
228 k--;
229 if( k >len2 ) break;
230 continue;
231 }
232 if( ms2 == (int)'-' )
233 {
234 tmpscore += (double)penalty;
235 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
236 while( (ms2=(int)mseq2[++k]) == (int)'-' )
237 ;
238 // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
239 k--;
240 if( k > len2 ) break;
241 continue;
242 }
243 }
244 *value += (double)tmpscore * (double)efficient;
245 // reporterr( "val in _gapnomi = %f\n", *value );
246 }
247 }
248 #if 0
249 fprintf( stdout, "###score = %f\n", score );
250 #endif
251 #if DEBUG
252 reporterr( "score in intergroup_score = %f\n", score );
253 #endif
254 // return( score );
255 }
intergroup_score_gapnomi(char ** seq1,char ** seq2,double * eff1,double * eff2,int clus1,int clus2,int len,double * value)256 void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
257 {
258 int i, j, k;
259 int len2 = len - 2;
260 int ms1, ms2;
261 double tmpscore;
262 char *mseq1, *mseq2;
263 double efficient;
264
265 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
266 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
267
268
269
270 *value = 0.0;
271 for( i=0; i<clus1; i++ )
272 {
273 for( j=0; j<clus2; j++ )
274 {
275 efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
276 mseq1 = seq1[i];
277 mseq2 = seq2[j];
278 tmpscore = 0.0;
279 for( k=0; k<len; k++ )
280 {
281 ms1 = (int)mseq1[k];
282 ms2 = (int)mseq2[k];
283 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
284 // tmpscore += (double)amino_dis[ms1][ms2];
285
286 if( ms1 == (int)'-' )
287 {
288 tmpscore += (double)penalty;
289 // tmpscore += (double)amino_dis[ms1][ms2];
290 while( (ms1=(int)mseq1[++k]) == (int)'-' )
291 ;
292 // tmpscore += (double)amino_dis[ms1][ms2];
293 k--;
294 if( k >len2 ) break;
295 continue;
296 }
297 if( ms2 == (int)'-' )
298 {
299 tmpscore += (double)penalty;
300 // tmpscore += (double)amino_dis[ms1][ms2];
301 while( (ms2=(int)mseq2[++k]) == (int)'-' )
302 ;
303 // tmpscore += (double)amino_dis[ms1][ms2];
304 k--;
305 if( k > len2 ) break;
306 continue;
307 }
308 }
309 *value += (double)tmpscore * (double)efficient;
310 // reporterr( "val in _gapnomi = %f\n", *value );
311 }
312 }
313 #if 0
314 fprintf( stdout, "###score = %f\n", score );
315 #endif
316 #if DEBUG
317 reporterr( "score in intergroup_score = %f\n", score );
318 #endif
319 // return( score );
320 }
321
intergroup_score_multimtx(int ** whichmtx,double *** scoringmatrices,char ** seq1,char ** seq2,double * eff1,double * eff2,int clus1,int clus2,int len,double * value)322 void intergroup_score_multimtx( int **whichmtx, double ***scoringmatrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
323 {
324 int i, j, k, c;
325 int len2 = len - 2;
326 int mn1, mn2;
327 double tmpscore;
328 char *mseq1, *mseq2;
329 double efficient;
330 int gapnum = amino_n['-'];
331
332 double gaptmpscore;
333 double gapscore = 0.0;
334
335 // reporterr( "#### in intergroup_score\n" );
336
337 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
338 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
339
340 // reporterr( "\n intergroup_score_multimtx ..." );
341 *value = 0.0;
342 for( i=0; i<clus1; i++ )
343 {
344 for( j=0; j<clus2; j++ )
345 {
346 efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
347 c = whichmtx[i][j];
348 mseq1 = seq1[i];
349 mseq2 = seq2[j];
350 tmpscore = 0.0;
351 gaptmpscore = 0.0;
352 for( k=0; k<len; k++ )
353 {
354 mn1 = amino_n[(int)(mseq1[k])];
355 mn2 = amino_n[(int)(mseq2[k])];
356 if( mn1 == gapnum && mn2 == gapnum ) continue;
357 tmpscore += (double)scoringmatrices[c][mn1][mn2];
358 // tmpscore += (double)scoringmtx[mn1][mn2];
359
360 if( mn1 == gapnum )
361 {
362 tmpscore += (double)penalty;
363 gaptmpscore += (double)penalty;
364 // tmpscore += (double)scoringmtx[mn1][mn2];
365 tmpscore += (double)scoringmatrices[c][mn1][mn2];
366 while( (mn1=amino_n[(int)mseq1[++k]]) == gapnum )
367 tmpscore += (double)scoringmatrices[c][mn1][mn2];
368 // tmpscore += (double)scoringmtx[mn1][mn2];
369 k--;
370 if( k >len2 ) break;
371 continue;
372 }
373 if( mn2 == gapnum )
374 {
375 tmpscore += (double)penalty;
376 gaptmpscore += (double)penalty;
377 tmpscore += (double)scoringmatrices[c][mn1][mn2];
378 // tmpscore += (double)scoringmtx[mn1][mn2];
379 while( (mn2=amino_n[(int)mseq2[++k]]) == gapnum )
380 tmpscore += (double)scoringmatrices[c][mn1][mn2];
381 // tmpscore += (double)scoringmtx[mn1][mn2];
382 k--;
383 if( k > len2 ) break;
384 continue;
385 }
386 }
387 *value += (double)tmpscore * (double)efficient;
388 gapscore += (double)gaptmpscore * (double)efficient;
389 }
390 }
391 // reporterr( "done." );
392 #if 0
393 reporterr( "###gapscore = %f\n", gapscore );
394 #endif
395 #if DEBUG
396 reporterr( "score in intergroup_score = %f\n", score );
397 #endif
398 // return( score );
399 }
intergroup_score_dynmtx(double ** offsetmtx,int scoringmtx[0x80][0x80],char ** seq1,char ** seq2,double * eff1,double * eff2,int clus1,int clus2,int len,double * value)400 void intergroup_score_dynmtx( double **offsetmtx, int scoringmtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
401 {
402 int i, j, k;
403 int len2 = len - 2;
404 int ms1, ms2;
405 double tmpscore;
406 char *mseq1, *mseq2;
407 double efficient;
408
409 double gaptmpscore;
410 double gapscore = 0.0;
411
412 // reporterr( "#### in intergroup_score\n" );
413
414 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
415 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
416
417 reporterr( "\n intergroup_score_dynmtx ..." );
418 *value = 0.0;
419 for( i=0; i<clus1; i++ )
420 {
421 for( j=0; j<clus2; j++ )
422 {
423 efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
424 mseq1 = seq1[i];
425 mseq2 = seq2[j];
426 tmpscore = 0.0;
427 gaptmpscore = 0.0;
428 for( k=0; k<len; k++ )
429 {
430 ms1 = (int)mseq1[k];
431 ms2 = (int)mseq2[k];
432 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
433 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
434 // tmpscore += (double)scoringmtx[ms1][ms2];
435
436 if( ms1 == (int)'-' )
437 {
438 tmpscore += (double)penalty;
439 gaptmpscore += (double)penalty;
440 // tmpscore += (double)scoringmtx[ms1][ms2];
441 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;;
442 while( (ms1=(int)mseq1[++k]) == (int)'-' )
443 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
444 // tmpscore += (double)scoringmtx[ms1][ms2];
445 k--;
446 if( k >len2 ) break;
447 continue;
448 }
449 if( ms2 == (int)'-' )
450 {
451 tmpscore += (double)penalty;
452 gaptmpscore += (double)penalty;
453 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
454 // tmpscore += (double)scoringmtx[ms1][ms2];
455 while( (ms2=(int)mseq2[++k]) == (int)'-' )
456 tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
457 // tmpscore += (double)scoringmtx[ms1][ms2];
458 k--;
459 if( k > len2 ) break;
460 continue;
461 }
462 }
463 *value += (double)tmpscore * (double)efficient;
464 gapscore += (double)gaptmpscore * (double)efficient;
465 }
466 }
467 reporterr( "done." );
468 #if 0
469 reporterr( "###gapscore = %f\n", gapscore );
470 #endif
471 #if DEBUG
472 reporterr( "score in intergroup_score = %f\n", score );
473 #endif
474 // return( score );
475 }
intergroup_score(char ** seq1,char ** seq2,double * eff1,double * eff2,int clus1,int clus2,int len,double * value)476 void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
477 {
478 int i, j, k;
479 int len2 = len - 2;
480 int ms1, ms2;
481 double tmpscore;
482 char *mseq1, *mseq2;
483 double efficient;
484
485 double gaptmpscore;
486 double gapscore = 0.0;
487
488 // reporterr( "#### in intergroup_score\n" );
489
490 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
491 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
492
493 *value = 0.0;
494 for( i=0; i<clus1; i++ )
495 {
496 for( j=0; j<clus2; j++ )
497 {
498 efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
499 mseq1 = seq1[i];
500 mseq2 = seq2[j];
501 tmpscore = 0.0;
502 gaptmpscore = 0.0;
503 for( k=0; k<len; k++ )
504 {
505 ms1 = (int)mseq1[k];
506 ms2 = (int)mseq2[k];
507 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
508 // tmpscore += (double)amino_dis[ms1][ms2];
509 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
510
511 if( ms1 == (int)'-' )
512 {
513 tmpscore += (double)penalty;
514 gaptmpscore += (double)penalty;
515 // tmpscore += (double)amino_dis[ms1][ms2];
516 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
517 while( (ms1=(int)mseq1[++k]) == (int)'-' )
518 // tmpscore += (double)amino_dis[ms1][ms2];
519 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
520 k--;
521 if( k >len2 ) break;
522 continue;
523 }
524 if( ms2 == (int)'-' )
525 {
526 tmpscore += (double)penalty;
527 gaptmpscore += (double)penalty;
528 // tmpscore += (double)amino_dis[ms1][ms2];
529 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
530 while( (ms2=(int)mseq2[++k]) == (int)'-' )
531 // tmpscore += (double)amino_dis[ms1][ms2];
532 tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
533 k--;
534 if( k > len2 ) break;
535 continue;
536 }
537 }
538 *value += (double)tmpscore * (double)efficient;
539 gapscore += (double)gaptmpscore * (double)efficient;
540 }
541 }
542 #if 0
543 reporterr( "###gapscore = %f\n", gapscore );
544 #endif
545 #if DEBUG
546 reporterr( "score in intergroup_score = %f\n", score );
547 #endif
548 // return( score );
549 }
intergroup_score_new(char ** seq1,char ** seq2,double * eff1,double * eff2,int clus1,int clus2,int len,double * value)550 void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
551 {
552 int i, j, k;
553 int len2 = len - 2;
554 int ms1, ms2;
555 double tmpscore;
556 char *mseq1, *mseq2;
557 static double efficient[1];
558
559 // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
560 // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
561
562 *value = 0.0;
563 for( i=0; i<clus1; i++ )
564 {
565 for( j=0; j<clus2; j++ )
566 {
567 *efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */
568 mseq1 = seq1[i];
569 mseq2 = seq2[j];
570 tmpscore = 0.0;
571 for( k=0; k<len; k++ )
572 {
573 ms1 = (int)mseq1[k];
574 ms2 = (int)mseq2[k];
575 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
576 tmpscore += (double)amino_dis[ms1][ms2];
577
578 if( ms1 == (int)'-' )
579 {
580 tmpscore += (double)penalty;
581 tmpscore += (double)amino_dis[ms1][ms2];
582 while( (ms1=(int)mseq1[++k]) == (int)'-' )
583 tmpscore += (double)amino_dis[ms1][ms2];
584 k--;
585 if( k >len2 ) break;
586 continue;
587 }
588 if( ms2 == (int)'-' )
589 {
590 tmpscore += (double)penalty;
591 tmpscore += (double)amino_dis[ms1][ms2];
592 while( (ms2=(int)mseq2[++k]) == (int)'-' )
593 tmpscore += (double)amino_dis[ms1][ms2];
594 k--;
595 if( k > len2 ) break;
596 continue;
597 }
598 }
599 *value += (double)tmpscore * (double)*efficient;
600 }
601 }
602 #if 0
603 fprintf( stdout, "###score = %f\n", score );
604 #endif
605 #if DEBUG
606 reporterr( "score in intergroup_score = %f\n", score );
607 #endif
608 // return( score );
609 }
610
611
score_calc5(char ** seq,int s,double ** eff,int ex)612 double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
613 {
614 int i, j, k;
615 double c;
616 int len = strlen( seq[0] );
617 double score;
618 double tmpscore;
619 char *mseq1, *mseq2;
620 double efficient;
621 #if DEBUG
622 FILE *fp;
623 #endif
624
625 score = 0.0;
626 c = 0.0;
627
628 for( i=0; i<s; i++ )
629 {
630
631 if( i == ex ) continue;
632 efficient = eff[i][ex];
633 mseq1 = seq[i];
634 mseq2 = seq[ex];
635 tmpscore = 0.0;
636 for( k=0; k<len; k++ )
637 {
638 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
639 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
640
641 if( mseq1[k] == '-' )
642 {
643 tmpscore += penalty;
644 while( mseq1[++k] == '-' )
645 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
646 k--;
647 if( k > len-2 ) break;
648 continue;
649 }
650 if( mseq2[k] == '-' )
651 {
652 tmpscore += penalty;
653 while( mseq2[++k] == '-' )
654 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
655 k--;
656 if( k > len-2 ) break;
657 continue;
658 }
659 }
660 score += (double)tmpscore * efficient;
661 /*
662 fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient );
663 */
664 }
665 /*
666 fprintf( stdout, "total score = %f\n", score );
667 */
668
669 for( i=0; i<s-1; i++ )
670 {
671 for( j=i+1; j<s; j++ )
672 {
673 if( i == ex || j == ex ) continue;
674
675 efficient = eff[i][j];
676 mseq1 = seq[i];
677 mseq2 = seq[j];
678 tmpscore = 0.0;
679 for( k=0; k<len; k++ )
680 {
681 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
682 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
683
684 if( mseq1[k] == '-' )
685 {
686 tmpscore += penalty;
687 while( mseq1[++k] == '-' )
688 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
689 k--;
690 if( k > len-2 ) break;
691 continue;
692 }
693 if( mseq2[k] == '-' )
694 {
695 tmpscore += penalty;
696 while( mseq2[++k] == '-' )
697 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
698 k--;
699 if( k > len-2 ) break;
700 continue;
701 }
702 }
703 score += (double)tmpscore * efficient;
704 }
705 }
706 /*
707 reporterr( "score in score_calc5 = %f\n", score );
708 */
709 return( (double)score );
710 /*
711
712 fprintf( trap_g, "score by fast = %f\n", (double)score );
713
714 tmpscore = score = 0.0;
715 for( i=0; i<s; i++ )
716 {
717 if( i == ex ) continue;
718 tmpscore = Cscore_m_1( seq, i, eff );
719 fprintf( stdout, "%d %f\n", i, tmpscore );
720
721 score += tmpscore;
722 }
723 tmpscore = Cscore_m_1( seq, ex, eff );
724 fprintf( stdout, "ex%d %f\n", i, tmpscore );
725 score += tmpscore;
726
727 return( score );
728 */
729 }
730
731
732
score_calc4(char ** seq,int s,double ** eff,int ex)733 double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
734 {
735 int i, j, k;
736 double c;
737 int len = strlen( seq[0] );
738 double score;
739 long tmpscore;
740 char *mseq1, *mseq2;
741 double efficient;
742
743 score = 0.0;
744 c = 0.0;
745 /*
746 printf( "in score_calc4\n" );
747 for( i=0; i<s; i++ )
748 {
749 for( j=0; j<s; j++ )
750 {
751 printf( "% 5.3f", eff[i][j] );
752 }
753 printf( "\n" );
754
755 }
756 */
757 for( i=0; i<s-1; i++ )
758 {
759 for( j=i+1; j<s; j++ )
760 {
761 efficient = eff[i][j];
762 if( mix == 1 ) efficient = 1.0;
763 /*
764 printf( "weight for %d v.s. %d = %f\n", i, j, efficient );
765 */
766 mseq1 = seq[i];
767 mseq2 = seq[j];
768 tmpscore = 0;
769 for( k=0; k<len; k++ )
770 {
771 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
772 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
773
774 c += efficient;
775
776 if( mseq1[k] == '-' )
777 {
778 tmpscore += penalty - n_dis[24][0];
779 while( mseq1[++k] == '-' )
780 ;
781 k--;
782 if( k > len-2 ) break;
783 continue;
784 }
785 if( mseq2[k] == '-' )
786 {
787 tmpscore += penalty - n_dis[24][0];
788 while( mseq2[++k] == '-' )
789 ;
790 k--;
791 if( k > len-2 ) break;
792 continue;
793 }
794 }
795 /*
796 if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len );
797 */
798 score += (double)tmpscore * efficient;
799 }
800 }
801 score /= c;
802 return( (double)score );
803 }
804
805
806
upg2(int nseq,double ** eff,int *** topol,double ** len)807 void upg2( int nseq, double **eff, int ***topol, double **len )
808 {
809 int i, j, k;
810 double tmplen[M];
811
812 static char **pair = NULL;
813
814 if( !pair )
815 {
816 pair = AllocateCharMtx( njob, njob );
817 }
818
819 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
820 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
821 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
822
823 for( k=0; k<nseq-1; k++ )
824 {
825 double minscore = 9999.0;
826 int im = -1, jm = -1;
827 int count;
828
829 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
830 {
831 if( eff[i][j] < minscore )
832 {
833 minscore = eff[i][j];
834 im = i; jm = j;
835 }
836 }
837 for( i=0, count=0; i<nseq; i++ )
838 if( pair[im][i] > 0 )
839 {
840 topol[k][0][count] = i;
841 count++;
842 }
843 topol[k][0][count] = -1;
844 for( i=0, count=0; i<nseq; i++ )
845 if( pair[jm][i] > 0 )
846 {
847 topol[k][1][count] = i;
848 count++;
849 }
850 topol[k][1][count] = -1;
851
852 len[k][0] = minscore / 2.0 - tmplen[im];
853 len[k][1] = minscore / 2.0 - tmplen[jm];
854
855 tmplen[im] = minscore / 2.0;
856
857 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
858 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
859
860 for( i=0; i<nseq; i++ )
861 {
862 if( i != im && i != jm )
863 {
864 eff[MIN(i,im)][MAX(i,im)] =
865 ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0;
866 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
867 }
868 eff[im][jm] = 9999.0;
869 }
870 #if DEBUG
871 printf( "STEP-%03d:\n", k+1 );
872 printf( "len0 = %f\n", len[k][0] );
873 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
874 printf( "\n" );
875 printf( "len1 = %f\n", len[k][1] );
876 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
877 printf( "\n" );
878 #endif
879 }
880 }
881
setnearest(int nseq,Bchain * acpt,double ** eff,double * mindisfrompt,int * nearestpt,int pos)882 static void setnearest( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
883 {
884 int j;
885 double tmpdouble;
886 double mindisfrom;
887 int nearest;
888 // double **effptpt;
889 Bchain *acptj;
890
891 mindisfrom = 999.9;
892 nearest = -1;
893
894 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
895
896 // for( j=pos+1; j<nseq; j++ )
897 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
898 {
899 j = acptj->pos;
900 // if( (tmpdouble=*effpt++) < *mindisfrompt )
901 if( (tmpdouble=eff[pos][j-pos]) < mindisfrom )
902 {
903 mindisfrom = tmpdouble;
904 nearest = j;
905 }
906 }
907 // effptpt = eff;
908 // for( j=0; j<pos; j++ )
909 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
910 {
911 j = acptj->pos;
912 // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt )
913 if( (tmpdouble=eff[j][pos-j]) < mindisfrom )
914 {
915 mindisfrom = tmpdouble;
916 nearest = j;
917 }
918 }
919
920 *mindisfrompt = mindisfrom;
921 *nearestpt = nearest;
922 }
923
setnearest_double_fullmtx(int nseq,Bchain * acpt,double ** eff,double * mindisfrompt,int * nearestpt,int pos)924 static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
925 {
926 int j;
927 double tmpdouble;
928 double **effptpt;
929 Bchain *acptj;
930
931 *mindisfrompt = 999.9;
932 *nearestpt = -1;
933
934 // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
935
936 // for( j=pos+1; j<nseq; j++ )
937 for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
938 {
939 j = acptj->pos;
940 // if( (tmpdouble=*effpt++) < *mindisfrompt )
941 if( (tmpdouble=eff[pos][j]) < *mindisfrompt )
942 {
943 *mindisfrompt = tmpdouble;
944 *nearestpt = j;
945 }
946 }
947 effptpt = eff;
948 // for( j=0; j<pos; j++ )
949 for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
950 {
951 j = acptj->pos;
952 // if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt )
953 if( (tmpdouble=eff[j][pos]) < *mindisfrompt )
954 {
955 *mindisfrompt = tmpdouble;
956 *nearestpt = j;
957 }
958 }
959 }
960
961
962
loadtreeoneline(int * ar,double * len,FILE * fp)963 static void loadtreeoneline( int *ar, double *len, FILE *fp )
964 {
965 static char gett[1000];
966 int res;
967 char *p;
968
969 p = fgets( gett, 999, fp );
970 if( p == NULL )
971 {
972 reporterr( "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" );
973 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
974 exit( 1 );
975 }
976
977
978 res = sscanf( gett, "%d %d %lf %lf", ar, ar+1, len, len+1 );
979 if( res != 4 )
980 {
981 reporterr( "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" );
982 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
983 exit( 1 );
984 }
985
986 ar[0]--;
987 ar[1]--;
988
989 if( ar[0] >= ar[1] )
990 {
991 reporterr( "\n\nIncorrect guide tree\n" );
992 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
993 exit( 1 );
994 }
995
996
997 // reporterr( "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
998 // reporterr( "len[0] = %f, len[1] = %f\n", len[0], len[1] );
999 }
1000
loadtop(int nseq,double ** mtx,int *** topol,double ** len,char ** name,int * nlen,Treedep * dep)1001 void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep *dep )
1002 {
1003 int i, j, k, minijm, maxijm;
1004 int *intpt, *intpt2;
1005 int *hist = NULL;
1006 Bchain *ac = NULL;
1007 int im = -1, jm = -1;
1008 Bchain *acjmnext, *acjmprev;
1009 int prevnode;
1010 int *pt1, *pt2, *pt11, *pt22;
1011 int *nmemar;
1012 int nmemim, nmemjm;
1013 char **tree;
1014 char *treetmp;
1015 char *nametmp, *nameptr, *tmpptr;
1016 char namec;
1017 FILE *fp;
1018 int node[2];
1019 double *height;
1020 double clusterdist;
1021 int mpair, mi, mj;
1022
1023 fp = fopen( "_guidetree", "r" );
1024 if( !fp )
1025 {
1026 reporterr( "cannot open _guidetree\n" );
1027 exit( 1 );
1028 }
1029
1030 if( !hist )
1031 {
1032 hist = AllocateIntVec( nseq );
1033 ac = (Bchain *)malloc( nseq * sizeof( Bchain ) );
1034 nmemar = AllocateIntVec( nseq );
1035 // treetmp = AllocateCharVec( nseq*50 );
1036 treetmp = NULL;
1037 nametmp = AllocateCharVec( 1000 ); // nagasugi
1038 // tree = AllocateCharMtx( nseq, nseq*50 );
1039 tree = AllocateCharMtx( nseq, 0 );
1040 height = AllocateFloatVec( nseq );
1041 }
1042
1043 for( i=0; i<nseq; i++ )
1044 {
1045 for( j=0; j<999; j++ ) nametmp[j] = 0;
1046 for( j=0; j<999; j++ )
1047 {
1048 namec = name[i][j];
1049 if( namec == 0 )
1050 break;
1051 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
1052 nametmp[j] = namec;
1053 else
1054 nametmp[j] = '_';
1055 }
1056 nametmp[j] = 0;
1057 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1058 if( outnumber )
1059 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1060 else
1061 nameptr = nametmp + 1;
1062
1063 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
1064
1065 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
1066 if( tree[i] == NULL )
1067 {
1068 reporterr( "Cannot allocate tree!\n" );
1069 exit( 1 );
1070 }
1071 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
1072 }
1073
1074
1075 for( i=0; i<nseq; i++ )
1076 {
1077 ac[i].next = ac+i+1;
1078 ac[i].prev = ac+i-1;
1079 ac[i].pos = i;
1080 }
1081 ac[nseq-1].next = NULL;
1082
1083
1084 for( i=0; i<nseq; i++ )
1085 {
1086 hist[i] = -1;
1087 nmemar[i] = 1;
1088 }
1089
1090 reporterr( "\n" );
1091 for( k=0; k<nseq-1; k++ )
1092 {
1093 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
1094 #if 0
1095 minscore = 999.9;
1096 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1097 {
1098 i = acpti->pos;
1099 // reporterr( "k=%d i=%d\n", k, i );
1100 if( mindisfrom[i] < minscore ) // muscle
1101 {
1102 im = i;
1103 minscore = mindisfrom[i];
1104 }
1105 }
1106 jm = nearest[im];
1107 if( jm < im )
1108 {
1109 j=jm; jm=im; im=j;
1110 }
1111 #else
1112 len[k][0] = len[k][1] = -1.0;
1113 loadtreeoneline( node, len[k], fp );
1114 im = node[0];
1115 jm = node[1];
1116
1117 if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
1118 {
1119 reporterr( "\n\nCheck the guide tree.\n" );
1120 reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
1121 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
1122 exit( 1 );
1123 }
1124
1125 #endif
1126
1127 prevnode = hist[im];
1128 if( dep ) dep[k].child0 = prevnode;
1129 nmemim = nmemar[im];
1130
1131 // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
1132
1133 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1134 if( prevnode == -1 )
1135 {
1136 *intpt++ = im;
1137 *intpt = -1;
1138 }
1139 else
1140 {
1141 pt1 = topol[prevnode][0];
1142 pt2 = topol[prevnode][1];
1143 if( *pt1 > *pt2 )
1144 {
1145 pt11 = pt2;
1146 pt22 = pt1;
1147 }
1148 else
1149 {
1150 pt11 = pt1;
1151 pt22 = pt2;
1152 }
1153 for( intpt2=pt11; *intpt2!=-1; )
1154 *intpt++ = *intpt2++;
1155 for( intpt2=pt22; *intpt2!=-1; )
1156 *intpt++ = *intpt2++;
1157 *intpt = -1;
1158 }
1159
1160
1161 nmemjm = nmemar[jm];
1162 prevnode = hist[jm];
1163 if( dep ) dep[k].child1 = prevnode;
1164
1165 // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
1166
1167 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1168 if( !intpt )
1169 {
1170 reporterr( "Cannot reallocate topol\n" );
1171 exit( 1 );
1172 }
1173 if( prevnode == -1 )
1174 {
1175 *intpt++ = jm;
1176 *intpt = -1;
1177 }
1178 else
1179 {
1180 pt1 = topol[prevnode][0];
1181 pt2 = topol[prevnode][1];
1182 if( *pt1 > *pt2 )
1183 {
1184 pt11 = pt2;
1185 pt22 = pt1;
1186 }
1187 else
1188 {
1189 pt11 = pt1;
1190 pt22 = pt2;
1191 }
1192 for( intpt2=pt11; *intpt2!=-1; )
1193 *intpt++ = *intpt2++;
1194 for( intpt2=pt22; *intpt2!=-1; )
1195 *intpt++ = *intpt2++;
1196 *intpt = -1;
1197 }
1198
1199
1200 // len[k][0] = ( minscore - tmptmplen[im] );
1201 // len[k][1] = ( minscore - tmptmplen[jm] );
1202 // len[k][0] = -1;
1203 // len[k][1] = -1;
1204
1205
1206 hist[im] = k;
1207 nmemar[im] = nmemim + nmemjm;
1208
1209
1210 if( len[k][0] == -1 || len[k][1] == -1 )
1211 {
1212 reporterr( "Re-computing the length of branch %d..\n", k );
1213 clusterdist = 0.0;
1214 mpair = 0;
1215 for( i=0; (mi=topol[k][0][i])>-1; i++ ) for( j=0; (mj=topol[k][1][j])>-1; j++ )
1216 {
1217 minijm = MIN(mi,mj);
1218 maxijm = MAX(mi,mj);
1219 clusterdist += mtx[minijm][maxijm-minijm];
1220 mpair += 1;
1221 }
1222 clusterdist /= (double)mpair;
1223 reporterr( "clusterdist = %f\n", clusterdist );
1224 if( len[k][0] == -1 ) len[k][0] = clusterdist/2.0 - height[im];
1225 if( len[k][1] == -1 ) len[k][1] = clusterdist/2.0 - height[im];
1226
1227 fprintf( stderr, "len0 = %f\n", len[k][0] );
1228 fprintf( stderr, "len1 = %f\n\n", len[k][1] );
1229 }
1230
1231 #if 0
1232 fprintf( stderr, "vSTEP-%03d:\n", k+1 );
1233 fprintf( stderr, "len0 = %f\n", len[k][0] );
1234 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 );
1235 fprintf( stderr, "\n" );
1236 fprintf( stderr, "len1 = %f\n", len[k][1] );
1237 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 );
1238 fprintf( stderr, "\n" );
1239
1240 #endif
1241 height[im] += len[k][0]; // for ig tree, 2015/Dec/25
1242 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25
1243 // reporterr( "##### dep[%d].distfromtip = %f\n", k, height[im] );
1244
1245
1246
1247 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
1248 if( !treetmp )
1249 {
1250 reporterr( "Cannot allocate treetmp\n" );
1251 exit( 1 );
1252 }
1253 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1254 free( tree[im] );
1255 free( tree[jm] );
1256 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
1257 tree[jm] = NULL;
1258 if( tree[im] == NULL )
1259 {
1260 reporterr( "Cannot reallocate tree!\n" );
1261 exit( 1 );
1262 }
1263 strcpy( tree[im], treetmp );
1264
1265 // reporterr( "im,jm=%d,%d\n", im, jm );
1266 acjmprev = ac[jm].prev;
1267 acjmnext = ac[jm].next;
1268 acjmprev->next = acjmnext;
1269 if( acjmnext != NULL )
1270 acjmnext->prev = acjmprev;
1271 // free( (void *)eff[jm] ); eff[jm] = NULL;
1272
1273 #if 0 // muscle seems to miss this.
1274 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1275 {
1276 i = acpti->pos;
1277 if( nearest[i] == im )
1278 {
1279 // reporterr( "calling setnearest\n" );
1280 // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1281 }
1282 }
1283 #endif
1284
1285
1286 }
1287 fclose( fp );
1288 fp = fopen( "infile.tree", "w" );
1289 fprintf( fp, "%s\n", treetmp );
1290 fprintf( fp, "#by loadtop\n" );
1291 fclose( fp );
1292
1293 FreeCharMtx( tree );
1294 free( treetmp );
1295 free( nametmp );
1296 free( hist );
1297 free( (char *)ac );
1298 free( (void *)nmemar );
1299 free( height );
1300
1301 }
1302
loadtree(int nseq,int *** topol,double ** len,char ** name,int * nlen,Treedep * dep)1303 void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep )
1304 {
1305 int i, j, k, miniim, maxiim, minijm, maxijm;
1306 int *intpt, *intpt2;
1307 int *hist = NULL;
1308 Bchain *ac = NULL;
1309 int im = -1, jm = -1;
1310 Bchain *acjmnext, *acjmprev;
1311 int prevnode;
1312 Bchain *acpti;
1313 int *pt1, *pt2, *pt11, *pt22;
1314 int *nmemar;
1315 int nmemim, nmemjm;
1316 char **tree;
1317 char *treetmp;
1318 char *nametmp, *nameptr, *tmpptr;
1319 char namec;
1320 FILE *fp;
1321 int node[2];
1322 double *height;
1323
1324 fp = fopen( "_guidetree", "r" );
1325 if( !fp )
1326 {
1327 reporterr( "cannot open _guidetree\n" );
1328 exit( 1 );
1329 }
1330
1331 if( !hist )
1332 {
1333 hist = AllocateIntVec( nseq );
1334 ac = (Bchain *)malloc( nseq * sizeof( Bchain ) );
1335 nmemar = AllocateIntVec( nseq );
1336 // treetmp = AllocateCharVec( nseq*50 );
1337 treetmp = NULL;
1338 nametmp = AllocateCharVec( 1000 ); // nagasugi
1339 // tree = AllocateCharMtx( nseq, nseq*50 );
1340 tree = AllocateCharMtx( nseq, 0 );
1341 if( dep ) height = AllocateFloatVec( nseq );
1342 }
1343
1344 for( i=0; i<nseq; i++ )
1345 {
1346 for( j=0; j<999; j++ ) nametmp[j] = 0;
1347 for( j=0; j<999; j++ )
1348 {
1349 namec = name[i][j];
1350 if( namec == 0 )
1351 break;
1352 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
1353 nametmp[j] = namec;
1354 else
1355 nametmp[j] = '_';
1356 }
1357 nametmp[j] = 0;
1358 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1359 if( outnumber )
1360 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1361 else
1362 nameptr = nametmp + 1;
1363
1364 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
1365
1366 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
1367 if( tree[i] == NULL )
1368 {
1369 reporterr( "Cannot allocate tree!\n" );
1370 exit( 1 );
1371 }
1372 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
1373 }
1374
1375
1376 for( i=0; i<nseq; i++ )
1377 {
1378 ac[i].next = ac+i+1;
1379 ac[i].prev = ac+i-1;
1380 ac[i].pos = i;
1381 }
1382 ac[nseq-1].next = NULL;
1383
1384
1385 for( i=0; i<nseq; i++ )
1386 {
1387 hist[i] = -1;
1388 nmemar[i] = 1;
1389 }
1390
1391 reporterr( "\n" );
1392 for( k=0; k<nseq-1; k++ )
1393 {
1394 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
1395 #if 0
1396 minscore = 999.9;
1397 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1398 {
1399 i = acpti->pos;
1400 // reporterr( "k=%d i=%d\n", k, i );
1401 if( mindisfrom[i] < minscore ) // muscle
1402 {
1403 im = i;
1404 minscore = mindisfrom[i];
1405 }
1406 }
1407 jm = nearest[im];
1408 if( jm < im )
1409 {
1410 j=jm; jm=im; im=j;
1411 }
1412 #else
1413 len[k][0] = len[k][1] = -1.0;
1414 loadtreeoneline( node, len[k], fp );
1415 im = node[0];
1416 jm = node[1];
1417
1418 if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
1419 {
1420 reporterr( "\n\nCheck the guide tree.\n" );
1421 reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
1422 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
1423 exit( 1 );
1424 }
1425
1426
1427 if( len[k][0] == -1.0 || len[k][1] == -1.0 )
1428 {
1429 reporterr( "\n\nERROR: Branch length is not given.\n" );
1430 exit( 1 );
1431 }
1432
1433 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
1434 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
1435
1436
1437 #endif
1438
1439 prevnode = hist[im];
1440 if( dep ) dep[k].child0 = prevnode;
1441 nmemim = nmemar[im];
1442
1443 // reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
1444
1445 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
1446 if( prevnode == -1 )
1447 {
1448 *intpt++ = im;
1449 *intpt = -1;
1450 }
1451 else
1452 {
1453 pt1 = topol[prevnode][0];
1454 pt2 = topol[prevnode][1];
1455 if( *pt1 > *pt2 )
1456 {
1457 pt11 = pt2;
1458 pt22 = pt1;
1459 }
1460 else
1461 {
1462 pt11 = pt1;
1463 pt22 = pt2;
1464 }
1465 for( intpt2=pt11; *intpt2!=-1; )
1466 *intpt++ = *intpt2++;
1467 for( intpt2=pt22; *intpt2!=-1; )
1468 *intpt++ = *intpt2++;
1469 *intpt = -1;
1470 }
1471
1472
1473 nmemjm = nmemar[jm];
1474 prevnode = hist[jm];
1475 if( dep ) dep[k].child1 = prevnode;
1476
1477 // reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
1478
1479 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
1480 if( !intpt )
1481 {
1482 reporterr( "Cannot reallocate topol\n" );
1483 exit( 1 );
1484 }
1485 if( prevnode == -1 )
1486 {
1487 *intpt++ = jm;
1488 *intpt = -1;
1489 }
1490 else
1491 {
1492 pt1 = topol[prevnode][0];
1493 pt2 = topol[prevnode][1];
1494 if( *pt1 > *pt2 )
1495 {
1496 pt11 = pt2;
1497 pt22 = pt1;
1498 }
1499 else
1500 {
1501 pt11 = pt1;
1502 pt22 = pt2;
1503 }
1504 for( intpt2=pt11; *intpt2!=-1; )
1505 *intpt++ = *intpt2++;
1506 for( intpt2=pt22; *intpt2!=-1; )
1507 *intpt++ = *intpt2++;
1508 *intpt = -1;
1509 }
1510
1511
1512 // len[k][0] = ( minscore - tmptmplen[im] );
1513 // len[k][1] = ( minscore - tmptmplen[jm] );
1514 // len[k][0] = -1;
1515 // len[k][1] = -1;
1516
1517
1518 hist[im] = k;
1519 nmemar[im] = nmemim + nmemjm;
1520
1521 // mindisfrom[im] = 999.9;
1522 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1523 {
1524 i = acpti->pos;
1525 if( i != im && i != jm )
1526 {
1527 if( i < im )
1528 {
1529 miniim = i;
1530 maxiim = im;
1531 minijm = i;
1532 maxijm = jm;
1533 }
1534 else if( i < jm )
1535 {
1536 miniim = im;
1537 maxiim = i;
1538 minijm = i;
1539 maxijm = jm;
1540 }
1541 else
1542 {
1543 miniim = im;
1544 maxiim = i;
1545 minijm = jm;
1546 maxijm = i;
1547 }
1548 }
1549 }
1550
1551
1552 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
1553 if( !treetmp )
1554 {
1555 reporterr( "Cannot allocate treetmp\n" );
1556 exit( 1 );
1557 }
1558 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
1559 free( tree[im] );
1560 free( tree[jm] );
1561 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
1562 tree[jm] = NULL;
1563 if( tree[im] == NULL )
1564 {
1565 reporterr( "Cannot reallocate tree!\n" );
1566 exit( 1 );
1567 }
1568 strcpy( tree[im], treetmp );
1569
1570 // reporterr( "im,jm=%d,%d\n", im, jm );
1571 acjmprev = ac[jm].prev;
1572 acjmnext = ac[jm].next;
1573 acjmprev->next = acjmnext;
1574 if( acjmnext != NULL )
1575 acjmnext->prev = acjmprev;
1576 // free( (void *)eff[jm] ); eff[jm] = NULL;
1577
1578 #if 0 // muscle seems to miss this.
1579 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
1580 {
1581 i = acpti->pos;
1582 if( nearest[i] == im )
1583 {
1584 // reporterr( "calling setnearest\n" );
1585 // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
1586 }
1587 }
1588 #endif
1589
1590
1591 #if 0
1592 fprintf( stderr, "vSTEP-%03d:\n", k+1 );
1593 fprintf( stderr, "len0 = %f\n", len[k][0] );
1594 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 );
1595 fprintf( stderr, "\n" );
1596 fprintf( stderr, "len1 = %f\n", len[k][1] );
1597 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 );
1598 fprintf( stderr, "\n" );
1599 #endif
1600
1601 if( dep )
1602 {
1603 height[im] += len[k][0]; // for ig tree, 2015/Dec/25
1604 dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25
1605 // reporterr( "##### dep[%d].distfromtip = %f\n\n", k, height[im] );
1606 }
1607 }
1608 fclose( fp );
1609 fp = fopen( "infile.tree", "w" );
1610 fprintf( fp, "%s\n", treetmp );
1611 fprintf( fp, "#by loadtree\n" );
1612 fclose( fp );
1613
1614 FreeCharMtx( tree );
1615 free( treetmp );
1616 free( nametmp );
1617 free( hist );
1618 free( (char *)ac );
1619 free( (void *)nmemar );
1620 if( dep ) free( height );
1621
1622 }
1623
1624 static double sueff1, sueff05;
1625 //static double sueff1_double, sueff05_double;
1626
cluster_mix_double(double d1,double d2)1627 static double cluster_mix_double( double d1, double d2 )
1628 {
1629 return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
1630 }
cluster_average_double(double d1,double d2)1631 static double cluster_average_double( double d1, double d2 )
1632 {
1633 return( ( d1 + d2 ) * 0.5 );
1634 }
cluster_minimum_double(double d1,double d2)1635 static double cluster_minimum_double( double d1, double d2 )
1636 {
1637 return( MIN( d1, d2 ) );
1638 }
1639 #if 0
1640 static double cluster_mix_double( double d1, double d2 )
1641 {
1642 return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
1643 }
1644 static double cluster_average_double( double d1, double d2 )
1645 {
1646 return( ( d1 + d2 ) * 0.5 );
1647 }
1648 static double cluster_minimum_double( double d1, double d2 )
1649 {
1650 return( MIN( d1, d2 ) );
1651 }
1652 #endif
1653
increaseintergroupdistanceshalfmtx(double ** eff,int ngroup,int ** groups,int nseq)1654 static void increaseintergroupdistanceshalfmtx( double **eff, int ngroup, int **groups, int nseq )
1655 {
1656 int nwarned = 0;
1657 int i, k, m, s1, s2, sl, ss;
1658 int *others, *tft;
1659 double maxdist, *dptr, dtmp;
1660 tft = calloc( nseq, sizeof( int * ) );
1661 others = calloc( nseq, sizeof( int * ) );
1662
1663 // for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
1664 // reporterr( "mtx[%d][%d] originally = %f (maxdist=%f)\n", m, k, eff[m][k-m], maxdist );
1665
1666 reporterr( "\n" ); // Hitsuyou desu.
1667 for( i=0; i<ngroup; i++ )
1668 {
1669 if( groups[i][1] == -1 ) continue;
1670
1671 for( m=0; m<nseq; m++ ) tft[m] = 0;
1672 for( m=0; (s1=groups[i][m])>-1; m++ ) tft[s1] = 1;
1673 for( m=0,k=0; m<nseq; m++ ) if( tft[m] == 0 ) others[k++] = m;
1674 others[k] = -1;
1675
1676 maxdist = 0.0;
1677 for( m=1; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k<m; k++ )
1678 {
1679 // reporterr( "m=%d, k=%d, s2=%d, s1=%d\n", m, k, s2, s1 );
1680
1681 if( s2 > s1 )
1682 {
1683 sl = s2; ss = s1;
1684 }
1685 else
1686 {
1687 sl = s1; ss = s2;
1688 }
1689 dtmp = eff[ss][sl-ss];
1690 if( dtmp > maxdist ) maxdist = dtmp;
1691 }
1692 // reporterr( "maxdist = %f\n", maxdist );
1693
1694 for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ )
1695 {
1696 if( s2 > s1 )
1697 {
1698 sl = s2; ss = s1;
1699 }
1700 else
1701 {
1702 sl = s1; ss = s2;
1703 }
1704 dptr = eff[ss] + sl-ss;
1705 if( *dptr < maxdist )
1706 {
1707 if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 );
1708 *dptr = maxdist;
1709 }
1710 }
1711 // for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
1712 // reporterr( "mtx[%d][%d] after modification%d = %f (maxdist=%f)\n", m, k, i, eff[m][k-m], maxdist );
1713 }
1714 if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" );
1715
1716 free( tft );
1717 free( others );
1718 }
1719
increaseintergroupdistancesfullmtx(double ** eff,int ngroup,int ** groups,int nseq)1720 static void increaseintergroupdistancesfullmtx( double **eff, int ngroup, int **groups, int nseq )
1721 {
1722 int nwarned = 0;
1723 int i, k, m, s1, s2, sl, ss;
1724 int *others, *tft;
1725 double maxdist, *dptr, dtmp;
1726 tft = calloc( nseq, sizeof( int * ) );
1727 others = calloc( nseq, sizeof( int * ) );
1728
1729 reporterr( "\n" ); // Hitsuyou desu.
1730 for( i=0; i<ngroup; i++ )
1731 {
1732 if( groups[i][1] == -1 ) continue;
1733
1734 for( m=0; m<nseq; m++ ) tft[m] = 0;
1735 for( m=0; (s1=groups[i][m])>-1; m++ ) tft[s1] = 1;
1736 for( m=0,k=0; m<nseq; m++ ) if( tft[m] == 0 ) others[k++] = m;
1737 others[k] = -1;
1738
1739 maxdist = 0.0;
1740 for( m=1; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k<m; k++ )
1741 {
1742 if( s2 > s1 )
1743 {
1744 sl = s2; ss = s1;
1745 }
1746 else
1747 {
1748 sl = s1; ss = s2;
1749 }
1750 dtmp = eff[ss][sl];
1751 if( dtmp > maxdist ) maxdist = dtmp;
1752 }
1753
1754 // reporterr( "maxdist = %f\n", maxdist );
1755
1756 for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ )
1757 {
1758 if( s2 > s1 )
1759 {
1760 sl = s2; ss = s1;
1761 }
1762 else
1763 {
1764 sl = s1; ss = s2;
1765 }
1766 dptr = eff[ss] + sl;
1767 if( *dptr < maxdist )
1768 {
1769 if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 );
1770 *dptr = maxdist;
1771 }
1772 }
1773 }
1774 if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" );
1775
1776 // for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
1777 // reporterr( "mtx[%d][%d] after modification = %f (maxdist=%f)\n", m, k, eff[m][k], maxdist );
1778 free( tft );
1779 free( others );
1780 }
1781
fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained(int nseq,double ** eff,int *** topol,double ** len,char ** name,int * nlen,Treedep * dep,int ngroup,int ** groups,int efffree)1782 void fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int ngroup, int **groups, int efffree )
1783 {
1784 int i, j, k, miniim, maxiim, minijm, maxijm;
1785 int *intpt, *intpt2;
1786 double tmpdouble;
1787 double eff1, eff0;
1788 double *tmptmplen = NULL; //static?
1789 int *hist = NULL; //static?
1790 Bchain *ac = NULL; //static?
1791 int im = -1, jm = -1;
1792 Bchain *acjmnext, *acjmprev;
1793 int prevnode;
1794 Bchain *acpti, *acptj;
1795 int *pt1, *pt2, *pt11, *pt22;
1796 int *nmemar; //static?
1797 int nmemim, nmemjm;
1798 double minscore;
1799 int *nearest = NULL; // by D.Mathog, a guess
1800 double *mindisfrom = NULL; // by D.Mathog, a guess
1801 char **tree; //static?
1802 char *treetmp; //static?
1803 char *nametmp, *nameptr, *tmpptr; //static?
1804 FILE *fp;
1805 double (*clusterfuncpt[1])(double,double);
1806 char namec;
1807 int *testtopol, **inconsistent;
1808 int **inconsistentpairlist;
1809 int ninconsistentpairs;
1810 int *warned;
1811 int allinconsistent;
1812 int firsttime;
1813
1814 increaseintergroupdistanceshalfmtx( eff, ngroup, groups, nseq );
1815
1816 sueff1 = 1 - (double)sueff_global;
1817 sueff05 = (double)sueff_global * 0.5;
1818 if ( treemethod == 'X' )
1819 clusterfuncpt[0] = cluster_mix_double;
1820 else if ( treemethod == 'E' )
1821 clusterfuncpt[0] = cluster_average_double;
1822 else if ( treemethod == 'q' )
1823 clusterfuncpt[0] = cluster_minimum_double;
1824 else
1825 {
1826 reporterr( "Unknown treemethod, %c\n", treemethod );
1827 exit( 1 );
1828 }
1829
1830 if( !hist )
1831 {
1832 hist = AllocateIntVec( njob );
1833 tmptmplen = AllocateFloatVec( njob );
1834 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
1835 nmemar = AllocateIntVec( njob );
1836 mindisfrom = AllocateFloatVec( njob );
1837 nearest = AllocateIntVec( njob );
1838 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
1839 treetmp = NULL; // kentou 2013/06/12
1840 nametmp = AllocateCharVec( 1000 ); // nagasugi
1841 // tree = AllocateCharMtx( njob, njob*600 );
1842 tree = AllocateCharMtx( njob, 0 );
1843 testtopol = AllocateIntVec( njob + 1 );
1844 inconsistent = AllocateIntMtx( njob, njob ); // muda
1845 inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda
1846 warned = AllocateIntVec( ngroup );
1847 }
1848
1849
1850 for( i=0; i<nseq; i++ )
1851 {
1852 for( j=0; j<999; j++ ) nametmp[j] = 0;
1853 for( j=0; j<999; j++ )
1854 {
1855 namec = name[i][j];
1856 if( namec == 0 )
1857 break;
1858 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
1859 nametmp[j] = namec;
1860 else
1861 nametmp[j] = '_';
1862 }
1863 nametmp[j] = 0;
1864 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
1865 if( outnumber )
1866 nameptr = strstr( nametmp, "_numo_e" ) + 8;
1867 else
1868 nameptr = nametmp + 1;
1869
1870 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
1871
1872 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
1873 if( tree[i] == NULL )
1874 {
1875 reporterr( "Cannot allocate tree!\n" );
1876 exit( 1 );
1877 }
1878 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
1879 }
1880 for( i=0; i<nseq; i++ )
1881 {
1882 ac[i].next = ac+i+1;
1883 ac[i].prev = ac+i-1;
1884 ac[i].pos = i;
1885 }
1886 ac[nseq-1].next = NULL;
1887
1888 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
1889
1890 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
1891 for( i=0; i<nseq; i++ )
1892 {
1893 hist[i] = -1;
1894 nmemar[i] = 1;
1895 }
1896
1897 reporterr( "\n" );
1898 ninconsistentpairs = 0;
1899 for( k=0; k<nseq-1; k++ )
1900 {
1901 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
1902
1903 for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0;
1904 // for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!!
1905 ninconsistentpairs = 0;
1906 firsttime = 1;
1907 while( 1 )
1908 {
1909 if( firsttime )
1910 {
1911 firsttime = 0;
1912 minscore = 999.9;
1913 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1914 {
1915 i = acpti->pos;
1916 // reporterr( "k=%d i=%d\n", k, i );
1917 if( mindisfrom[i] < minscore ) // muscle
1918 {
1919 im = i;
1920 minscore = mindisfrom[i];
1921 }
1922 }
1923 jm = nearest[im];
1924 if( jm < im )
1925 {
1926 j=jm; jm=im; im=j;
1927 }
1928 }
1929 else
1930 {
1931 minscore = 999.9;
1932 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1933 {
1934 i = acpti->pos;
1935 // reporterr( "k=%d i=%d\n", k, i );
1936 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
1937 {
1938 j = acptj->pos;
1939 if( !inconsistent[i][j] && (tmpdouble=eff[i][j-i]) < minscore )
1940 {
1941 minscore = tmpdouble;
1942 im = i; jm = j;
1943 }
1944 }
1945 for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next )
1946 {
1947 j = acptj->pos;
1948 if( !inconsistent[j][i] && (tmpdouble=eff[j][i-j]) < minscore )
1949 {
1950 minscore = tmpdouble;
1951 im = j; jm = i;
1952 }
1953 }
1954 }
1955 }
1956
1957
1958 allinconsistent = 1;
1959 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
1960 {
1961 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
1962 {
1963 if( inconsistent[acpti->pos][acptj->pos] == 0 )
1964 {
1965 allinconsistent = 0;
1966 goto exitloop_f;
1967 }
1968 }
1969 }
1970 exitloop_f:
1971
1972 if( allinconsistent )
1973 {
1974 reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" );
1975 exit( 1 );
1976 }
1977 #if 1
1978 intpt = testtopol;
1979 prevnode = hist[im];
1980 if( prevnode == -1 )
1981 {
1982 *intpt++ = im;
1983 }
1984 else
1985 {
1986 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
1987 *intpt++ = *intpt2++;
1988 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
1989 *intpt++ = *intpt2++;
1990 }
1991
1992 prevnode = hist[jm];
1993 if( prevnode == -1 )
1994 {
1995 *intpt++ = jm;
1996 }
1997 else
1998 {
1999 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
2000 *intpt++ = *intpt2++;
2001 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
2002 *intpt++ = *intpt2++;
2003 }
2004 *intpt = -1;
2005 // reporterr( "testtopol = \n" );
2006 // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 );
2007 // reporterr( "\n" );
2008 #endif
2009 for( i=0; i<ngroup; i++ )
2010 {
2011 // reporterr( "groups[%d] = \n", i );
2012 // for( j=0; groups[i][j]>-1; j++ ) reporterr( " %03d", groups[i][j]+1 );
2013 // reporterr( "\n" );
2014 if( overlapmember( groups[i], testtopol ) )
2015 {
2016 if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) )
2017 {
2018 if( !warned[i] )
2019 {
2020 warned[i] = 1;
2021 reporterr( "\n###################################################################\n" );
2022 reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 );
2023 reporterr( "###################################################################\n" );
2024 }
2025 inconsistent[im][jm] = 1;
2026 inconsistentpairlist[ninconsistentpairs][0] = im;
2027 inconsistentpairlist[ninconsistentpairs][1] = jm;
2028 ninconsistentpairs++;
2029 break;
2030 }
2031 }
2032 }
2033 if( i == ngroup )
2034 {
2035 // reporterr( "OK\n" );
2036 break;
2037 }
2038 }
2039
2040
2041 prevnode = hist[im];
2042 if( dep ) dep[k].child0 = prevnode;
2043 nmemim = nmemar[im];
2044 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
2045 if( prevnode == -1 )
2046 {
2047 *intpt++ = im;
2048 *intpt = -1;
2049 }
2050 else
2051 {
2052 pt1 = topol[prevnode][0];
2053 pt2 = topol[prevnode][1];
2054 if( *pt1 > *pt2 )
2055 {
2056 pt11 = pt2;
2057 pt22 = pt1;
2058 }
2059 else
2060 {
2061 pt11 = pt1;
2062 pt22 = pt2;
2063 }
2064 for( intpt2=pt11; *intpt2!=-1; )
2065 *intpt++ = *intpt2++;
2066 for( intpt2=pt22; *intpt2!=-1; )
2067 *intpt++ = *intpt2++;
2068 *intpt = -1;
2069 }
2070
2071 prevnode = hist[jm];
2072 if( dep ) dep[k].child1 = prevnode;
2073 nmemjm = nmemar[jm];
2074 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
2075 if( !intpt )
2076 {
2077 reporterr( "Cannot reallocate topol\n" );
2078 exit( 1 );
2079 }
2080 if( prevnode == -1 )
2081 {
2082 *intpt++ = jm;
2083 *intpt = -1;
2084 }
2085 else
2086 {
2087 pt1 = topol[prevnode][0];
2088 pt2 = topol[prevnode][1];
2089 if( *pt1 > *pt2 )
2090 {
2091 pt11 = pt2;
2092 pt22 = pt1;
2093 }
2094 else
2095 {
2096 pt11 = pt1;
2097 pt22 = pt2;
2098 }
2099 for( intpt2=pt11; *intpt2!=-1; )
2100 *intpt++ = *intpt2++;
2101 for( intpt2=pt22; *intpt2!=-1; )
2102 *intpt++ = *intpt2++;
2103 *intpt = -1;
2104 }
2105
2106 minscore *= 0.5;
2107
2108 len[k][0] = ( minscore - tmptmplen[im] );
2109 len[k][1] = ( minscore - tmptmplen[jm] );
2110 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
2111 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
2112
2113 if( dep ) dep[k].distfromtip = minscore;
2114 // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
2115
2116 tmptmplen[im] = minscore;
2117
2118 hist[im] = k;
2119 nmemar[im] = nmemim + nmemjm;
2120
2121 mindisfrom[im] = 999.9;
2122 eff[im][jm-im] = 999.9;
2123 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2124 {
2125 i = acpti->pos;
2126 if( i != im && i != jm )
2127 {
2128 if( i < im )
2129 {
2130 miniim = i;
2131 maxiim = im;
2132 minijm = i;
2133 maxijm = jm;
2134 }
2135 else if( i < jm )
2136 {
2137 miniim = im;
2138 maxiim = i;
2139 minijm = i;
2140 maxijm = jm;
2141 }
2142 else
2143 {
2144 miniim = im;
2145 maxiim = i;
2146 minijm = jm;
2147 maxijm = i;
2148 }
2149 eff0 = eff[miniim][maxiim-miniim];
2150 eff1 = eff[minijm][maxijm-minijm];
2151 #if 0
2152 tmpdouble = eff[miniim][maxiim-miniim] =
2153 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
2154 #else
2155 tmpdouble = eff[miniim][maxiim-miniim] =
2156 (clusterfuncpt[0])( eff0, eff1 );
2157 #endif
2158 #if 1
2159 if( tmpdouble < mindisfrom[i] )
2160 {
2161 mindisfrom[i] = tmpdouble;
2162 nearest[i] = im;
2163 }
2164 if( tmpdouble < mindisfrom[im] )
2165 {
2166 mindisfrom[im] = tmpdouble;
2167 nearest[im] = i;
2168 }
2169 if( nearest[i] == jm )
2170 {
2171 nearest[i] = im;
2172 }
2173 #endif
2174 }
2175 }
2176
2177 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
2178 if( !treetmp )
2179 {
2180 reporterr( "Cannot allocate treetmp\n" );
2181 exit( 1 );
2182 }
2183 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2184 free( tree[im] );
2185 free( tree[jm] );
2186 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
2187 tree[jm] = NULL;
2188 if( tree[im] == NULL )
2189 {
2190 reporterr( "Cannot reallocate tree!\n" );
2191 exit( 1 );
2192 }
2193 strcpy( tree[im], treetmp );
2194
2195 acjmprev = ac[jm].prev;
2196 acjmnext = ac[jm].next;
2197 acjmprev->next = acjmnext;
2198 if( acjmnext != NULL )
2199 acjmnext->prev = acjmprev;
2200 if( efffree )
2201 {
2202 free( (void *)eff[jm] ); eff[jm] = NULL;
2203 }
2204
2205 #if 1 // muscle seems to miss this.
2206 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2207 {
2208 i = acpti->pos;
2209 if( nearest[i] == im )
2210 {
2211 if( i < im )
2212 {
2213 miniim = i;
2214 maxiim = im;
2215 }
2216 else
2217 {
2218 miniim = im;
2219 maxiim = i;
2220 }
2221 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
2222 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
2223 }
2224 }
2225 #endif
2226
2227
2228 #if 0
2229 reporterr( "\noSTEP-%03d:\n", k+1 );
2230 reporterr( "len0 = %f\n", len[k][0] );
2231 for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 );
2232 reporterr( "\n" );
2233 reporterr( "len1 = %f\n", len[k][1] );
2234 for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 );
2235 reporterr( "\n\n" );
2236 #endif
2237 }
2238 fp = fopen( "infile.tree", "w" );
2239 fprintf( fp, "%s\n", treetmp );
2240 fclose( fp );
2241
2242 free( tree[0] );
2243 free( tree );
2244 free( treetmp );
2245 free( nametmp );
2246 free( (void *)tmptmplen ); tmptmplen = NULL;
2247 free( hist ); hist = NULL;
2248 free( (char *)ac ); ac = NULL;
2249 free( (void *)nmemar ); nmemar = NULL;
2250 free( mindisfrom );
2251 free( nearest );
2252 free( testtopol );
2253 FreeIntMtx( inconsistent );
2254 FreeIntMtx( inconsistentpairlist );
2255 free( warned );
2256 }
2257
fixed_musclesupg_double_realloc_nobk_halfmtx_treeout(int nseq,double ** eff,int *** topol,double ** len,char ** name,int * nlen,Treedep * dep,int efffree)2258 void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree )
2259 {
2260 int i, j, k, miniim, maxiim, minijm, maxijm;
2261 int *intpt, *intpt2;
2262 double tmpdouble;
2263 double eff1, eff0;
2264 double *tmptmplen = NULL; //static?
2265 int *hist = NULL; //static?
2266 Bchain *ac = NULL; //static?
2267 int im = -1, jm = -1;
2268 Bchain *acjmnext, *acjmprev;
2269 int prevnode;
2270 Bchain *acpti;
2271 int *pt1, *pt2, *pt11, *pt22;
2272 int *nmemar; //static?
2273 int nmemim, nmemjm;
2274 double minscore;
2275 int *nearest = NULL; // by D.Mathog, a guess
2276 double *mindisfrom = NULL; // by D.Mathog, a guess
2277 char **tree; //static?
2278 char *treetmp; //static?
2279 char *nametmp, *nameptr, *tmpptr; //static?
2280 FILE *fp;
2281 double (*clusterfuncpt[1])(double,double);
2282 char namec;
2283
2284
2285 sueff1 = 1 - (double)sueff_global;
2286 sueff05 = (double)sueff_global * 0.5;
2287 if ( treemethod == 'X' )
2288 clusterfuncpt[0] = cluster_mix_double;
2289 else if ( treemethod == 'E' )
2290 clusterfuncpt[0] = cluster_average_double;
2291 else if ( treemethod == 'q' )
2292 clusterfuncpt[0] = cluster_minimum_double;
2293 else
2294 {
2295 reporterr( "Unknown treemethod, %c\n", treemethod );
2296 exit( 1 );
2297 }
2298
2299 if( !hist )
2300 {
2301 hist = AllocateIntVec( njob );
2302 tmptmplen = AllocateFloatVec( njob );
2303 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
2304 nmemar = AllocateIntVec( njob );
2305 mindisfrom = AllocateFloatVec( njob );
2306 nearest = AllocateIntVec( njob );
2307 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
2308 treetmp = NULL; // kentou 2013/06/12
2309 nametmp = AllocateCharVec( 1000 ); // nagasugi
2310 // tree = AllocateCharMtx( njob, njob*600 );
2311 tree = AllocateCharMtx( njob, 0 );
2312 }
2313
2314
2315 for( i=0; i<nseq; i++ )
2316 {
2317 for( j=0; j<999; j++ ) nametmp[j] = 0;
2318 for( j=0; j<999; j++ )
2319 {
2320 namec = name[i][j];
2321 if( namec == 0 )
2322 break;
2323 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
2324 nametmp[j] = namec;
2325 else
2326 nametmp[j] = '_';
2327 }
2328 nametmp[j] = 0;
2329 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
2330 if( outnumber )
2331 nameptr = strstr( nametmp, "_numo_e" ) + 8;
2332 else
2333 nameptr = nametmp + 1;
2334
2335 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
2336
2337 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
2338 if( tree[i] == NULL )
2339 {
2340 reporterr( "Cannot allocate tree!\n" );
2341 exit( 1 );
2342 }
2343 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
2344 }
2345 for( i=0; i<nseq; i++ )
2346 {
2347 ac[i].next = ac+i+1;
2348 ac[i].prev = ac+i-1;
2349 ac[i].pos = i;
2350 }
2351 ac[nseq-1].next = NULL;
2352
2353 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
2354
2355 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2356 for( i=0; i<nseq; i++ )
2357 {
2358 hist[i] = -1;
2359 nmemar[i] = 1;
2360 }
2361
2362 reporterr( "\n" );
2363 for( k=0; k<nseq-1; k++ )
2364 {
2365 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
2366
2367 minscore = 999.9;
2368 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
2369 {
2370 i = acpti->pos;
2371 // reporterr( "k=%d i=%d\n", k, i );
2372 if( mindisfrom[i] < minscore ) // muscle
2373 {
2374 im = i;
2375 minscore = mindisfrom[i];
2376 }
2377 }
2378 jm = nearest[im];
2379 if( jm < im )
2380 {
2381 j=jm; jm=im; im=j;
2382 }
2383
2384
2385 prevnode = hist[im];
2386 if( dep ) dep[k].child0 = prevnode;
2387 nmemim = nmemar[im];
2388 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
2389 if( prevnode == -1 )
2390 {
2391 *intpt++ = im;
2392 *intpt = -1;
2393 }
2394 else
2395 {
2396 pt1 = topol[prevnode][0];
2397 pt2 = topol[prevnode][1];
2398 if( *pt1 > *pt2 )
2399 {
2400 pt11 = pt2;
2401 pt22 = pt1;
2402 }
2403 else
2404 {
2405 pt11 = pt1;
2406 pt22 = pt2;
2407 }
2408 for( intpt2=pt11; *intpt2!=-1; )
2409 *intpt++ = *intpt2++;
2410 for( intpt2=pt22; *intpt2!=-1; )
2411 *intpt++ = *intpt2++;
2412 *intpt = -1;
2413 }
2414
2415 prevnode = hist[jm];
2416 if( dep ) dep[k].child1 = prevnode;
2417 nmemjm = nmemar[jm];
2418 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
2419 if( !intpt )
2420 {
2421 reporterr( "Cannot reallocate topol\n" );
2422 exit( 1 );
2423 }
2424 if( prevnode == -1 )
2425 {
2426 *intpt++ = jm;
2427 *intpt = -1;
2428 }
2429 else
2430 {
2431 pt1 = topol[prevnode][0];
2432 pt2 = topol[prevnode][1];
2433 if( *pt1 > *pt2 )
2434 {
2435 pt11 = pt2;
2436 pt22 = pt1;
2437 }
2438 else
2439 {
2440 pt11 = pt1;
2441 pt22 = pt2;
2442 }
2443 for( intpt2=pt11; *intpt2!=-1; )
2444 *intpt++ = *intpt2++;
2445 for( intpt2=pt22; *intpt2!=-1; )
2446 *intpt++ = *intpt2++;
2447 *intpt = -1;
2448 }
2449
2450 minscore *= 0.5;
2451
2452 len[k][0] = ( minscore - tmptmplen[im] );
2453 len[k][1] = ( minscore - tmptmplen[jm] );
2454
2455 if( dep ) dep[k].distfromtip = minscore;
2456 // reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
2457
2458 tmptmplen[im] = minscore;
2459
2460 hist[im] = k;
2461 nmemar[im] = nmemim + nmemjm;
2462
2463 mindisfrom[im] = 999.9;
2464 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2465 {
2466 i = acpti->pos;
2467 if( i != im && i != jm )
2468 {
2469 if( i < im )
2470 {
2471 miniim = i;
2472 maxiim = im;
2473 minijm = i;
2474 maxijm = jm;
2475 }
2476 else if( i < jm )
2477 {
2478 miniim = im;
2479 maxiim = i;
2480 minijm = i;
2481 maxijm = jm;
2482 }
2483 else
2484 {
2485 miniim = im;
2486 maxiim = i;
2487 minijm = jm;
2488 maxijm = i;
2489 }
2490 eff0 = eff[miniim][maxiim-miniim];
2491 eff1 = eff[minijm][maxijm-minijm];
2492 #if 0
2493 tmpdouble = eff[miniim][maxiim-miniim] =
2494 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
2495 #else
2496 tmpdouble = eff[miniim][maxiim-miniim] =
2497 (clusterfuncpt[0])( eff0, eff1 );
2498 #endif
2499 if( tmpdouble < mindisfrom[i] )
2500 {
2501 mindisfrom[i] = tmpdouble;
2502 nearest[i] = im;
2503 }
2504 if( tmpdouble < mindisfrom[im] )
2505 {
2506 mindisfrom[im] = tmpdouble;
2507 nearest[im] = i;
2508 }
2509 if( nearest[i] == jm )
2510 {
2511 nearest[i] = im;
2512 }
2513 }
2514 }
2515
2516 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
2517 if( !treetmp )
2518 {
2519 reporterr( "Cannot allocate treetmp\n" );
2520 exit( 1 );
2521 }
2522 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2523 free( tree[im] );
2524 free( tree[jm] );
2525 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
2526 tree[jm] = NULL;
2527 if( tree[im] == NULL )
2528 {
2529 reporterr( "Cannot reallocate tree!\n" );
2530 exit( 1 );
2531 }
2532 strcpy( tree[im], treetmp );
2533
2534 acjmprev = ac[jm].prev;
2535 acjmnext = ac[jm].next;
2536 acjmprev->next = acjmnext;
2537 if( acjmnext != NULL )
2538 acjmnext->prev = acjmprev;
2539 if( efffree )
2540 {
2541 free( (void *)eff[jm] ); eff[jm] = NULL;
2542 }
2543
2544 #if 1 // muscle seems to miss this.
2545 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2546 {
2547 i = acpti->pos;
2548 if( nearest[i] == im )
2549 {
2550 if( i < im )
2551 {
2552 miniim = i;
2553 maxiim = im;
2554 }
2555 else
2556 {
2557 miniim = im;
2558 maxiim = i;
2559 }
2560 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
2561 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
2562 }
2563 }
2564 #endif
2565
2566
2567 #if 0
2568 reporterr( "\nooSTEP-%03d:\n", k+1 );
2569 reporterr( "len0 = %f\n", len[k][0] );
2570 for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 );
2571 reporterr( "\n" );
2572 reporterr( "len1 = %f\n", len[k][1] );
2573 for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 );
2574 reporterr( "\n" );
2575 #endif
2576 }
2577 fp = fopen( "infile.tree", "w" );
2578 fprintf( fp, "%s\n", treetmp );
2579 fclose( fp );
2580
2581 free( tree[0] );
2582 free( tree );
2583 free( treetmp );
2584 free( nametmp );
2585 free( (void *)tmptmplen ); tmptmplen = NULL;
2586 free( hist ); hist = NULL;
2587 free( (char *)ac ); ac = NULL;
2588 free( (void *)nmemar ); nmemar = NULL;
2589 free( mindisfrom );
2590 free( nearest );
2591 }
2592
fixed_musclesupg_double_treeout(int nseq,double ** eff,int *** topol,double ** len,char ** name)2593 void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
2594 {
2595 int i, j, k, miniim, maxiim, minijm, maxijm;
2596 int *intpt, *intpt2;
2597 double tmpdouble;
2598 double eff1, eff0;
2599 static double *tmptmplen = NULL;
2600 static int *hist = NULL;
2601 static Bchain *ac = NULL;
2602 int im = -1, jm = -1;
2603 Bchain *acjmnext, *acjmprev;
2604 int prevnode;
2605 Bchain *acpti;
2606 int *pt1, *pt2, *pt11, *pt22;
2607 static int *nmemar;
2608 int nmemim, nmemjm;
2609 double minscore;
2610 int *nearest = NULL; // by D.Mathog, a guess
2611 double *mindisfrom = NULL; // by D.Mathog, a guess
2612 static char **tree;
2613 static char *treetmp;
2614 static char *nametmp, *nameptr, *tmpptr;
2615 FILE *fp;
2616 double (*clusterfuncpt[1])(double,double);
2617 char namec;
2618
2619
2620 sueff1 = 1.0 - sueff_global;
2621 sueff05 = sueff_global * 0.5;
2622 if ( treemethod == 'X' )
2623 clusterfuncpt[0] = cluster_mix_double;
2624 else if ( treemethod == 'E' )
2625 clusterfuncpt[0] = cluster_average_double;
2626 else if ( treemethod == 'q' )
2627 clusterfuncpt[0] = cluster_minimum_double;
2628 else
2629 {
2630 reporterr( "Unknown treemethod, %c\n", treemethod );
2631 exit( 1 );
2632 }
2633
2634
2635
2636
2637
2638 #if 0
2639 if( !hist )
2640 {
2641 hist = AllocateIntVec( njob );
2642 tmptmplen = AllocateDoubleVec( njob );
2643 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
2644 nmemar = AllocateIntVec( njob );
2645 mindisfrom = AllocateDoubleVec( njob );
2646 nearest = AllocateIntVec( njob );
2647 treetmp = AllocateCharVec( njob*150 );
2648 nametmp = AllocateCharVec( 91 );
2649 tree = AllocateCharMtx( njob, njob*150 );
2650 }
2651 for( i=0; i<nseq; i++ )
2652 {
2653 for( j=0; j<90; j++ ) nametmp[j] = 0;
2654 for( j=0; j<90; j++ )
2655 {
2656 if( name[i][j] == 0 )
2657 break;
2658 else if( isalnum( name[i][j] ) )
2659 nametmp[j] = name[i][j];
2660 else
2661 nametmp[j] = '_';
2662 }
2663 nametmp[90] = 0;
2664 // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
2665 if( outnumber )
2666 nameptr = strstr( nametmp, "_numo_e" ) + 8;
2667 else
2668 nameptr = nametmp + 1;
2669
2670 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
2671
2672 sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr );
2673 }
2674
2675 #else
2676
2677 if( !hist )
2678 {
2679 hist = AllocateIntVec( njob );
2680 tmptmplen = AllocateDoubleVec( njob );
2681 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
2682 nmemar = AllocateIntVec( njob );
2683 mindisfrom = AllocateDoubleVec( njob );
2684 nearest = AllocateIntVec( njob );
2685 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
2686 treetmp = NULL; // kentou 2013/06/12
2687 nametmp = AllocateCharVec( 1000 ); // nagasugi
2688 // tree = AllocateCharMtx( njob, njob*600 );
2689 tree = AllocateCharMtx( njob, 0 );
2690 }
2691
2692
2693 for( i=0; i<nseq; i++ )
2694 {
2695 for( j=0; j<999; j++ ) nametmp[j] = 0;
2696 for( j=0; j<999; j++ )
2697 {
2698 namec = name[i][j];
2699 if( namec == 0 )
2700 break;
2701 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
2702 nametmp[j] = namec;
2703 else
2704 nametmp[j] = '_';
2705 }
2706 nametmp[j] = 0;
2707 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
2708 if( outnumber )
2709 nameptr = strstr( nametmp, "_numo_e" ) + 8;
2710 else
2711 nameptr = nametmp + 1;
2712
2713 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
2714
2715 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
2716 if( tree[i] == NULL )
2717 {
2718 reporterr( "Cannot allocate tree!\n" );
2719 exit( 1 );
2720 }
2721 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
2722 }
2723
2724 #endif
2725
2726
2727
2728
2729
2730
2731
2732
2733 for( i=0; i<nseq; i++ )
2734 {
2735 ac[i].next = ac+i+1;
2736 ac[i].prev = ac+i-1;
2737 ac[i].pos = i;
2738 }
2739 ac[nseq-1].next = NULL;
2740
2741 for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
2742
2743 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
2744 for( i=0; i<nseq; i++ )
2745 {
2746 hist[i] = -1;
2747 nmemar[i] = 1;
2748 }
2749
2750 reporterr( "\n" );
2751 for( k=0; k<nseq-1; k++ )
2752 {
2753 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
2754
2755 minscore = 999.9;
2756 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
2757 {
2758 i = acpti->pos;
2759 // reporterr( "k=%d i=%d\n", k, i );
2760 if( mindisfrom[i] < minscore ) // muscle
2761 {
2762 im = i;
2763 minscore = mindisfrom[i];
2764 }
2765 }
2766 jm = nearest[im];
2767 if( jm < im )
2768 {
2769 j=jm; jm=im; im=j;
2770 }
2771
2772
2773 prevnode = hist[im];
2774 nmemim = nmemar[im];
2775 // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
2776 intpt = topol[k][0];
2777 if( prevnode == -1 )
2778 {
2779 *intpt++ = im;
2780 *intpt = -1;
2781 }
2782 else
2783 {
2784 pt1 = topol[prevnode][0];
2785 pt2 = topol[prevnode][1];
2786 if( *pt1 > *pt2 )
2787 {
2788 pt11 = pt2;
2789 pt22 = pt1;
2790 }
2791 else
2792 {
2793 pt11 = pt1;
2794 pt22 = pt2;
2795 }
2796 for( intpt2=pt11; *intpt2!=-1; )
2797 *intpt++ = *intpt2++;
2798 for( intpt2=pt22; *intpt2!=-1; )
2799 *intpt++ = *intpt2++;
2800 *intpt = -1;
2801 }
2802
2803 prevnode = hist[jm];
2804 nmemjm = nmemar[jm];
2805 // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
2806 intpt = topol[k][1];
2807 if( prevnode == -1 )
2808 {
2809 *intpt++ = jm;
2810 *intpt = -1;
2811 }
2812 else
2813 {
2814 pt1 = topol[prevnode][0];
2815 pt2 = topol[prevnode][1];
2816 if( *pt1 > *pt2 )
2817 {
2818 pt11 = pt2;
2819 pt22 = pt1;
2820 }
2821 else
2822 {
2823 pt11 = pt1;
2824 pt22 = pt2;
2825 }
2826 for( intpt2=pt11; *intpt2!=-1; )
2827 *intpt++ = *intpt2++;
2828 for( intpt2=pt22; *intpt2!=-1; )
2829 *intpt++ = *intpt2++;
2830 *intpt = -1;
2831 }
2832
2833 minscore *= 0.5;
2834
2835 len[k][0] = ( minscore - tmptmplen[im] );
2836 len[k][1] = ( minscore - tmptmplen[jm] );
2837
2838
2839 tmptmplen[im] = minscore;
2840
2841 hist[im] = k;
2842 nmemar[im] = nmemim + nmemjm;
2843
2844 mindisfrom[im] = 999.9;
2845 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2846 {
2847 i = acpti->pos;
2848 if( i != im && i != jm )
2849 {
2850 if( i < im )
2851 {
2852 miniim = i;
2853 maxiim = im;
2854 minijm = i;
2855 maxijm = jm;
2856 }
2857 else if( i < jm )
2858 {
2859 miniim = im;
2860 maxiim = i;
2861 minijm = i;
2862 maxijm = jm;
2863 }
2864 else
2865 {
2866 miniim = im;
2867 maxiim = i;
2868 minijm = jm;
2869 maxijm = i;
2870 }
2871 eff0 = eff[miniim][maxiim];
2872 eff1 = eff[minijm][maxijm];
2873 #if 0
2874 tmpdouble = eff[miniim][maxiim] =
2875 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
2876 #else
2877 tmpdouble = eff[miniim][maxiim] =
2878 (clusterfuncpt[0])( eff0, eff1 );
2879 #endif
2880 if( tmpdouble < mindisfrom[i] )
2881 {
2882 mindisfrom[i] = tmpdouble;
2883 nearest[i] = im;
2884 }
2885 if( tmpdouble < mindisfrom[im] )
2886 {
2887 mindisfrom[im] = tmpdouble;
2888 nearest[im] = i;
2889 }
2890 if( nearest[i] == jm )
2891 {
2892 nearest[i] = im;
2893 }
2894 }
2895 }
2896 #if 0
2897 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2898 strcpy( tree[im], treetmp );
2899 #else
2900 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
2901 if( !treetmp )
2902 {
2903 reporterr( "Cannot allocate treetmp\n" );
2904 exit( 1 );
2905 }
2906 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
2907 free( tree[im] );
2908 free( tree[jm] );
2909 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
2910 tree[jm] = NULL;
2911 if( tree[im] == NULL )
2912 {
2913 reporterr( "Cannot reallocate tree!\n" );
2914 exit( 1 );
2915 }
2916 strcpy( tree[im], treetmp );
2917 #endif
2918
2919 acjmprev = ac[jm].prev;
2920 acjmnext = ac[jm].next;
2921 acjmprev->next = acjmnext;
2922 if( acjmnext != NULL )
2923 acjmnext->prev = acjmprev;
2924 // free( (void *)eff[jm] ); eff[jm] = NULL;
2925
2926 #if 1 // muscle seems to miss this.
2927 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
2928 {
2929 i = acpti->pos;
2930 if( nearest[i] == im )
2931 {
2932 if( i < im )
2933 {
2934 miniim = i;
2935 maxiim = im;
2936 }
2937 else
2938 {
2939 miniim = im;
2940 maxiim = i;
2941 }
2942 if( eff[miniim][maxiim] > mindisfrom[i] )
2943 setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
2944 }
2945 }
2946 #endif
2947
2948
2949 #if 0
2950 fprintf( stdout, "\nvSTEP-%03d:\n", k+1 );
2951 fprintf( stdout, "len0 = %f\n", len[k][0] );
2952 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
2953 fprintf( stdout, "\n" );
2954 fprintf( stdout, "len1 = %f\n", len[k][1] );
2955 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
2956 fprintf( stdout, "\n" );
2957 #endif
2958 }
2959 fp = fopen( "infile.tree", "w" );
2960 fprintf( fp, "%s\n", treetmp );
2961 fclose( fp );
2962 #if 0
2963 FreeCharMtx( tree );
2964 #else
2965 free( tree[0] );
2966 free( tree );
2967 #endif
2968 free( treetmp );
2969 free( nametmp );
2970 free( (void *)tmptmplen ); tmptmplen = NULL;
2971 free( hist ); hist = NULL;
2972 free( (char *)ac ); ac = NULL;
2973 free( (void *)nmemar ); nmemar = NULL;
2974 free( mindisfrom );
2975 free( nearest );
2976 }
2977
fixed_supg_double_treeout_constrained(int nseq,double ** eff,int *** topol,double ** len,char ** name,int ngroup,int ** groups)2978 void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups )
2979 {
2980 int i, j, k, miniim, maxiim, minijm, maxijm;
2981 int *intpt, *intpt2;
2982 double tmpdouble;
2983 double eff1, eff0;
2984 static double *tmptmplen = NULL;
2985 static int *hist = NULL;
2986 static Bchain *ac = NULL;
2987 int im = -1, jm = -1;
2988 Bchain *acjmnext, *acjmprev;
2989 int prevnode;
2990 Bchain *acpti, *acptj;
2991 int *pt1, *pt2, *pt11, *pt22;
2992 static int *nmemar;
2993 int nmemim, nmemjm;
2994 double minscore;
2995 int *nearest = NULL; // by D.Mathog, a guess
2996 double *mindisfrom = NULL; // by D.Mathog, a guess
2997 static char **tree;
2998 static char *treetmp;
2999 static char *nametmp, *nameptr, *tmpptr;
3000 FILE *fp;
3001 double (*clusterfuncpt[1])(double,double);
3002 char namec;
3003 int *testtopol, **inconsistent;
3004 int **inconsistentpairlist;
3005 int ninconsistentpairs;
3006 int *warned;
3007 int allinconsistent;
3008 int firsttime;
3009
3010 increaseintergroupdistancesfullmtx( eff, ngroup, groups, nseq );
3011
3012 sueff1 = 1 - sueff_global;
3013 sueff05 = sueff_global * 0.5;
3014 if ( treemethod == 'X' )
3015 clusterfuncpt[0] = cluster_mix_double;
3016 else if ( treemethod == 'E' )
3017 clusterfuncpt[0] = cluster_average_double;
3018 else if ( treemethod == 'q' )
3019 clusterfuncpt[0] = cluster_minimum_double;
3020 else
3021 {
3022 reporterr( "Unknown treemethod, %c\n", treemethod );
3023 exit( 1 );
3024 }
3025
3026
3027
3028
3029
3030 #if 0
3031 if( !hist )
3032 {
3033 hist = AllocateIntVec( njob );
3034 tmptmplen = AllocateDoubleVec( njob );
3035 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
3036 nmemar = AllocateIntVec( njob );
3037 mindisfrom = AllocateDoubleVec( njob );
3038 nearest = AllocateIntVec( njob );
3039 treetmp = AllocateCharVec( njob*150 );
3040 nametmp = AllocateCharVec( 91 );
3041 tree = AllocateCharMtx( njob, njob*150 );
3042 }
3043 for( i=0; i<nseq; i++ )
3044 {
3045 for( j=0; j<90; j++ ) nametmp[j] = 0;
3046 for( j=0; j<90; j++ )
3047 {
3048 if( name[i][j] == 0 )
3049 break;
3050 else if( isalnum( name[i][j] ) )
3051 nametmp[j] = name[i][j];
3052 else
3053 nametmp[j] = '_';
3054 }
3055 nametmp[90] = 0;
3056 // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
3057 if( outnumber )
3058 nameptr = strstr( nametmp, "_numo_e" ) + 8;
3059 else
3060 nameptr = nametmp + 1;
3061
3062 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
3063
3064 sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr );
3065 }
3066
3067 #else
3068
3069 if( !hist )
3070 {
3071 hist = AllocateIntVec( njob );
3072 tmptmplen = AllocateDoubleVec( njob );
3073 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
3074 nmemar = AllocateIntVec( njob );
3075 mindisfrom = AllocateDoubleVec( njob );
3076 nearest = AllocateIntVec( njob );
3077 // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
3078 treetmp = NULL; // kentou 2013/06/12
3079 nametmp = AllocateCharVec( 1000 ); // nagasugi
3080 // tree = AllocateCharMtx( njob, njob*600 );
3081 tree = AllocateCharMtx( njob, 0 );
3082 testtopol = AllocateIntVec( njob + 1 );
3083 inconsistent = AllocateIntMtx( njob, njob ); // muda
3084 inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda
3085 warned = AllocateIntVec( ngroup );
3086 }
3087
3088
3089 for( i=0; i<nseq; i++ )
3090 {
3091 for( j=0; j<999; j++ ) nametmp[j] = 0;
3092 for( j=0; j<999; j++ )
3093 {
3094 namec = name[i][j];
3095 if( namec == 0 )
3096 break;
3097 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
3098 nametmp[j] = namec;
3099 else
3100 nametmp[j] = '_';
3101 }
3102 nametmp[j] = 0;
3103 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
3104 if( outnumber )
3105 nameptr = strstr( nametmp, "_numo_e" ) + 8;
3106 else
3107 nameptr = nametmp + 1;
3108
3109 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
3110
3111 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
3112 if( tree[i] == NULL )
3113 {
3114 reporterr( "Cannot allocate tree!\n" );
3115 exit( 1 );
3116 }
3117 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
3118 }
3119
3120 #endif
3121
3122
3123
3124
3125
3126
3127
3128
3129 for( i=0; i<nseq; i++ )
3130 {
3131 ac[i].next = ac+i+1;
3132 ac[i].prev = ac+i-1;
3133 ac[i].pos = i;
3134 }
3135 ac[nseq-1].next = NULL;
3136
3137 for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
3138
3139 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
3140 for( i=0; i<nseq; i++ )
3141 {
3142 hist[i] = -1;
3143 nmemar[i] = 1;
3144 }
3145
3146 reporterr( "\n" );
3147 ninconsistentpairs = 0;
3148 for( k=0; k<nseq-1; k++ )
3149 {
3150 if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
3151
3152
3153
3154 // for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0;
3155 for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0;
3156 ninconsistentpairs = 0;
3157 firsttime = 1;
3158 while( 1 )
3159 {
3160 if( firsttime )
3161 {
3162 firsttime = 0;
3163 minscore = 999.9;
3164 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3165 {
3166 i = acpti->pos;
3167 // reporterr( "k=%d i=%d\n", k, i );
3168 if( mindisfrom[i] < minscore ) // muscle
3169 {
3170 im = i;
3171 minscore = mindisfrom[i];
3172 }
3173 }
3174 jm = nearest[im];
3175 if( jm < im )
3176 {
3177 j=jm; jm=im; im=j;
3178 }
3179 }
3180 else
3181 {
3182 minscore = 999.9;
3183 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3184 {
3185 i = acpti->pos;
3186 // reporterr( "k=%d i=%d\n", k, i );
3187 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
3188 {
3189 j = acptj->pos;
3190 if( !inconsistent[i][j] && (tmpdouble=eff[i][j]) < minscore )
3191 {
3192 minscore = tmpdouble;
3193 im = i; jm = j;
3194 }
3195 }
3196 for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next )
3197 {
3198 j = acptj->pos;
3199 if( !inconsistent[j][i] && (tmpdouble=eff[j][i]) < minscore )
3200 {
3201 minscore = tmpdouble;
3202 im = j; jm = i;
3203 }
3204 }
3205 }
3206 }
3207
3208 allinconsistent = 1;
3209 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3210 {
3211 for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
3212 {
3213 if( inconsistent[acpti->pos][acptj->pos] == 0 )
3214 {
3215 allinconsistent = 0;
3216 goto exitloop_d;
3217 }
3218 }
3219 }
3220 exitloop_d:
3221
3222 if( allinconsistent )
3223 {
3224 reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" );
3225 exit( 1 );
3226 }
3227 #if 1
3228 intpt = testtopol;
3229 prevnode = hist[im];
3230 if( prevnode == -1 )
3231 {
3232 *intpt++ = im;
3233 }
3234 else
3235 {
3236 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
3237 *intpt++ = *intpt2++;
3238 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
3239 *intpt++ = *intpt2++;
3240 }
3241
3242 prevnode = hist[jm];
3243 if( prevnode == -1 )
3244 {
3245 *intpt++ = jm;
3246 }
3247 else
3248 {
3249 for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
3250 *intpt++ = *intpt2++;
3251 for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
3252 *intpt++ = *intpt2++;
3253 }
3254 *intpt = -1;
3255 // reporterr( "testtopol = \n" );
3256 // for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 );
3257 // reporterr( "\n" );
3258 #endif
3259 for( i=0; i<ngroup; i++ )
3260 {
3261 // reporterr( "groups[%d] = \n", i );
3262 // for( j=0; groups[i][j]>-1; j++ ) reporterr( " %03d", groups[i][j]+1 );
3263 // reporterr( "\n" );
3264 if( overlapmember( testtopol, groups[i] ) )
3265 {
3266 if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) )
3267 {
3268 if( !warned[i] )
3269 {
3270 warned[i] = 1;
3271 reporterr( "\n###################################################################\n" );
3272 reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 );
3273 reporterr( "###################################################################\n" );
3274 }
3275 inconsistent[im][jm] = 1;
3276 inconsistentpairlist[ninconsistentpairs][0] = im;
3277 inconsistentpairlist[ninconsistentpairs][1] = jm;
3278 ninconsistentpairs++;
3279 break;
3280 }
3281 }
3282 }
3283 if( i == ngroup )
3284 {
3285 // reporterr( "OK\n" );
3286 break;
3287 }
3288 }
3289
3290
3291
3292
3293
3294
3295 prevnode = hist[im];
3296 nmemim = nmemar[im];
3297 // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
3298 intpt = topol[k][0];
3299 if( prevnode == -1 )
3300 {
3301 *intpt++ = im;
3302 *intpt = -1;
3303 }
3304 else
3305 {
3306 pt1 = topol[prevnode][0];
3307 pt2 = topol[prevnode][1];
3308 if( *pt1 > *pt2 )
3309 {
3310 pt11 = pt2;
3311 pt22 = pt1;
3312 }
3313 else
3314 {
3315 pt11 = pt1;
3316 pt22 = pt2;
3317 }
3318 for( intpt2=pt11; *intpt2!=-1; )
3319 *intpt++ = *intpt2++;
3320 for( intpt2=pt22; *intpt2!=-1; )
3321 *intpt++ = *intpt2++;
3322 *intpt = -1;
3323 }
3324
3325 prevnode = hist[jm];
3326 nmemjm = nmemar[jm];
3327 // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
3328 intpt = topol[k][1];
3329 if( prevnode == -1 )
3330 {
3331 *intpt++ = jm;
3332 *intpt = -1;
3333 }
3334 else
3335 {
3336 pt1 = topol[prevnode][0];
3337 pt2 = topol[prevnode][1];
3338 if( *pt1 > *pt2 )
3339 {
3340 pt11 = pt2;
3341 pt22 = pt1;
3342 }
3343 else
3344 {
3345 pt11 = pt1;
3346 pt22 = pt2;
3347 }
3348 for( intpt2=pt11; *intpt2!=-1; )
3349 *intpt++ = *intpt2++;
3350 for( intpt2=pt22; *intpt2!=-1; )
3351 *intpt++ = *intpt2++;
3352 *intpt = -1;
3353 }
3354
3355 minscore *= 0.5;
3356
3357 len[k][0] = ( minscore - tmptmplen[im] );
3358 len[k][1] = ( minscore - tmptmplen[jm] );
3359 if( len[k][0] < 0.0 ) len[k][0] = 0.0;
3360 if( len[k][1] < 0.0 ) len[k][1] = 0.0;
3361
3362
3363 tmptmplen[im] = minscore;
3364
3365 hist[im] = k;
3366 nmemar[im] = nmemim + nmemjm;
3367
3368 mindisfrom[im] = 999.9;
3369 eff[im][jm] = 999.9;
3370 // eff[im][jm-im] = 999.9; // bug??
3371
3372 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3373 {
3374 i = acpti->pos;
3375 if( i != im && i != jm )
3376 {
3377 if( i < im )
3378 {
3379 miniim = i;
3380 maxiim = im;
3381 minijm = i;
3382 maxijm = jm;
3383 }
3384 else if( i < jm )
3385 {
3386 miniim = im;
3387 maxiim = i;
3388 minijm = i;
3389 maxijm = jm;
3390 }
3391 else
3392 {
3393 miniim = im;
3394 maxiim = i;
3395 minijm = jm;
3396 maxijm = i;
3397 }
3398 eff0 = eff[miniim][maxiim];
3399 eff1 = eff[minijm][maxijm];
3400 #if 0
3401 tmpdouble = eff[miniim][maxiim] =
3402 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
3403 #else
3404 tmpdouble = eff[miniim][maxiim] =
3405 (clusterfuncpt[0])( eff0, eff1 );
3406 #endif
3407
3408 #if 1
3409 if( tmpdouble < mindisfrom[i] )
3410 {
3411 mindisfrom[i] = tmpdouble;
3412 nearest[i] = im;
3413 }
3414 if( tmpdouble < mindisfrom[im] )
3415 {
3416 mindisfrom[im] = tmpdouble;
3417 nearest[im] = i;
3418 }
3419 if( nearest[i] == jm )
3420 {
3421 nearest[i] = im;
3422 }
3423 #endif
3424 }
3425 }
3426 #if 0
3427 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
3428 strcpy( tree[im], treetmp );
3429 #else
3430 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
3431 if( !treetmp )
3432 {
3433 reporterr( "Cannot allocate treetmp\n" );
3434 exit( 1 );
3435 }
3436 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
3437 free( tree[im] );
3438 free( tree[jm] );
3439 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
3440 tree[jm] = NULL;
3441 if( tree[im] == NULL )
3442 {
3443 reporterr( "Cannot reallocate tree!\n" );
3444 exit( 1 );
3445 }
3446 strcpy( tree[im], treetmp );
3447 #endif
3448
3449 acjmprev = ac[jm].prev;
3450 acjmnext = ac[jm].next;
3451 acjmprev->next = acjmnext;
3452 if( acjmnext != NULL )
3453 acjmnext->prev = acjmprev;
3454 // free( (void *)eff[jm] ); eff[jm] = NULL;
3455
3456 #if 1 // muscle seems to miss this.
3457 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3458 {
3459 i = acpti->pos;
3460 if( nearest[i] == im )
3461 {
3462 if( i < im )
3463 {
3464 miniim = i;
3465 maxiim = im;
3466 }
3467 else
3468 {
3469 miniim = im;
3470 maxiim = i;
3471 }
3472 if( eff[miniim][maxiim] > mindisfrom[i] )
3473 setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
3474 }
3475 }
3476 #endif
3477
3478
3479 #if 0
3480 fprintf( stdout, "\ncSTEP-%03d:\n", k+1 );
3481 fprintf( stdout, "len0 = %f\n", len[k][0] );
3482 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
3483 fprintf( stdout, "\n" );
3484 fprintf( stdout, "len1 = %f\n", len[k][1] );
3485 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
3486 fprintf( stdout, "\n" );
3487 #endif
3488 }
3489 fp = fopen( "infile.tree", "w" );
3490 fprintf( fp, "%s\n", treetmp );
3491 fclose( fp );
3492 #if 0
3493 FreeCharMtx( tree );
3494 #else
3495 free( tree[0] );
3496 free( tree );
3497 #endif
3498 free( treetmp );
3499 free( nametmp );
3500 free( (void *)tmptmplen ); tmptmplen = NULL;
3501 free( hist ); hist = NULL;
3502 free( (char *)ac ); ac = NULL;
3503 free( (void *)nmemar ); nmemar = NULL;
3504 free( mindisfrom );
3505 free( nearest );
3506 free( testtopol );
3507 FreeIntMtx( inconsistent );
3508 FreeIntMtx( inconsistentpairlist );
3509 free( warned );
3510 }
3511
fixed_musclesupg_double_realloc_nobk_halfmtx(int nseq,double ** eff,int *** topol,double ** len,Treedep * dep,int progressout,int efffree)3512 void fixed_musclesupg_double_realloc_nobk_halfmtx( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree )
3513 {
3514 int i, j, k, miniim, maxiim, minijm, maxijm;
3515 int *intpt, *intpt2;
3516 double tmpdouble;
3517 double eff1, eff0;
3518 double *tmptmplen = NULL; // static TLS -> local, 2012/02/25
3519 int *hist = NULL; // static TLS -> local, 2012/02/25
3520 Bchain *ac = NULL; // static TLS -> local, 2012/02/25
3521 int im = -1, jm = -1;
3522 Bchain *acjmnext, *acjmprev;
3523 int prevnode;
3524 Bchain *acpti;
3525 int *pt1, *pt2, *pt11, *pt22;
3526 int *nmemar; // static TLS -> local, 2012/02/25
3527 int nmemim, nmemjm;
3528 double minscore;
3529 int *nearest = NULL; // by Mathog, a guess
3530 double *mindisfrom = NULL; // by Mathog, a guess
3531 double (*clusterfuncpt[1])(double,double);
3532
3533
3534 sueff1 = 1 - (double)sueff_global;
3535 sueff05 = (double)sueff_global * 0.5;
3536 if ( treemethod == 'X' )
3537 clusterfuncpt[0] = cluster_mix_double;
3538 else if ( treemethod == 'E' )
3539 clusterfuncpt[0] = cluster_average_double;
3540 else if ( treemethod == 'q' )
3541 clusterfuncpt[0] = cluster_minimum_double;
3542 else
3543 {
3544 reporterr( "Unknown treemethod, %c\n", treemethod );
3545 exit( 1 );
3546 }
3547
3548 if( !hist )
3549 {
3550 hist = AllocateIntVec( njob );
3551 tmptmplen = AllocateFloatVec( njob );
3552 ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
3553 nmemar = AllocateIntVec( njob );
3554 mindisfrom = AllocateFloatVec( njob );
3555 nearest = AllocateIntVec( njob );
3556 }
3557
3558
3559 for( i=0; i<nseq; i++ )
3560 {
3561 ac[i].next = ac+i+1;
3562 ac[i].prev = ac+i-1;
3563 ac[i].pos = i;
3564 }
3565 ac[nseq-1].next = NULL;
3566
3567 for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
3568
3569 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
3570 for( i=0; i<nseq; i++ )
3571 {
3572 hist[i] = -1;
3573 nmemar[i] = 1;
3574 }
3575
3576 if( progressout ) reporterr( "\n" );
3577 for( k=0; k<nseq-1; k++ )
3578 {
3579 if( progressout && k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
3580
3581 minscore = 999.9;
3582 for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
3583 {
3584 i = acpti->pos;
3585 // reporterr( "k=%d i=%d\n", k, i );
3586 if( mindisfrom[i] < minscore ) // muscle
3587 {
3588 im = i;
3589 minscore = mindisfrom[i];
3590 }
3591 }
3592 jm = nearest[im];
3593 if( jm < im )
3594 {
3595 j=jm; jm=im; im=j;
3596 }
3597
3598
3599 prevnode = hist[im];
3600 if( dep ) dep[k].child0 = prevnode;
3601 nmemim = nmemar[im];
3602 intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
3603 if( prevnode == -1 )
3604 {
3605 *intpt++ = im;
3606 *intpt = -1;
3607 }
3608 else
3609 {
3610 pt1 = topol[prevnode][0];
3611 pt2 = topol[prevnode][1];
3612 if( *pt1 > *pt2 )
3613 {
3614 pt11 = pt2;
3615 pt22 = pt1;
3616 }
3617 else
3618 {
3619 pt11 = pt1;
3620 pt22 = pt2;
3621 }
3622 for( intpt2=pt11; *intpt2!=-1; )
3623 *intpt++ = *intpt2++;
3624 for( intpt2=pt22; *intpt2!=-1; )
3625 *intpt++ = *intpt2++;
3626 *intpt = -1;
3627 }
3628
3629 prevnode = hist[jm];
3630 if( dep ) dep[k].child1 = prevnode;
3631 nmemjm = nmemar[jm];
3632 intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
3633 if( !intpt )
3634 {
3635 reporterr( "Cannot reallocate topol\n" );
3636 exit( 1 );
3637 }
3638 if( prevnode == -1 )
3639 {
3640 *intpt++ = jm;
3641 *intpt = -1;
3642 }
3643 else
3644 {
3645 pt1 = topol[prevnode][0];
3646 pt2 = topol[prevnode][1];
3647 if( *pt1 > *pt2 )
3648 {
3649 pt11 = pt2;
3650 pt22 = pt1;
3651 }
3652 else
3653 {
3654 pt11 = pt1;
3655 pt22 = pt2;
3656 }
3657 for( intpt2=pt11; *intpt2!=-1; )
3658 *intpt++ = *intpt2++;
3659 for( intpt2=pt22; *intpt2!=-1; )
3660 *intpt++ = *intpt2++;
3661 *intpt = -1;
3662 }
3663
3664 minscore *= 0.5;
3665
3666 len[k][0] = ( minscore - tmptmplen[im] );
3667 len[k][1] = ( minscore - tmptmplen[jm] );
3668
3669 if( dep ) dep[k].distfromtip = minscore;
3670
3671 tmptmplen[im] = minscore;
3672
3673 hist[im] = k;
3674 nmemar[im] = nmemim + nmemjm;
3675
3676 mindisfrom[im] = 999.9;
3677 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3678 {
3679 i = acpti->pos;
3680 if( i != im && i != jm )
3681 {
3682 if( i < im )
3683 {
3684 miniim = i;
3685 maxiim = im;
3686 minijm = i;
3687 maxijm = jm;
3688 }
3689 else if( i < jm )
3690 {
3691 miniim = im;
3692 maxiim = i;
3693 minijm = i;
3694 maxijm = jm;
3695 }
3696 else
3697 {
3698 miniim = im;
3699 maxiim = i;
3700 minijm = jm;
3701 maxijm = i;
3702 }
3703 eff0 = eff[miniim][maxiim-miniim];
3704 eff1 = eff[minijm][maxijm-minijm];
3705 tmpdouble = eff[miniim][maxiim-miniim] =
3706 #if 0
3707 MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
3708 #else
3709 (clusterfuncpt[0])( eff0, eff1 );
3710 #endif
3711 if( tmpdouble < mindisfrom[i] )
3712 {
3713 mindisfrom[i] = tmpdouble;
3714 nearest[i] = im;
3715 }
3716 if( tmpdouble < mindisfrom[im] )
3717 {
3718 mindisfrom[im] = tmpdouble;
3719 nearest[im] = i;
3720 }
3721 if( nearest[i] == jm )
3722 {
3723 nearest[i] = im;
3724 }
3725 }
3726 }
3727
3728 // reporterr( "im,jm=%d,%d\n", im, jm );
3729 acjmprev = ac[jm].prev;
3730 acjmnext = ac[jm].next;
3731 acjmprev->next = acjmnext;
3732 if( acjmnext != NULL )
3733 acjmnext->prev = acjmprev;
3734 if( efffree )
3735 {
3736 free( (void *)eff[jm] ); eff[jm] = NULL;
3737 }
3738
3739 #if 1 // muscle seems to miss this.
3740 for( acpti=ac; acpti!=NULL; acpti=acpti->next )
3741 {
3742 i = acpti->pos;
3743 if( nearest[i] == im )
3744 {
3745 if( i < im )
3746 {
3747 miniim = i;
3748 maxiim = im;
3749 }
3750 else
3751 {
3752 miniim = im;
3753 maxiim = i;
3754 }
3755 if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
3756 setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
3757 }
3758 }
3759 #endif
3760
3761
3762 #if 0
3763 fprintf( stdout, "vSTEP-%03d:\n", k+1 );
3764 fprintf( stdout, "len0 = %f\n", len[k][0] );
3765 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
3766 fprintf( stdout, "\n" );
3767 fprintf( stdout, "len1 = %f\n", len[k][1] );
3768 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
3769 fprintf( stdout, "\n" );
3770 #endif
3771 }
3772 free( (void *)tmptmplen ); tmptmplen = NULL;
3773 free( hist ); hist = NULL;
3774 free( (char *)ac ); ac = NULL;
3775 free( (void *)nmemar ); nmemar = NULL;
3776 free( mindisfrom );
3777 free( nearest );
3778 }
3779
3780
3781
3782
3783
3784
3785
3786
3787
veryfastsupg_double_loadtree(int nseq,double ** eff,int *** topol,double ** len,char ** name)3788 void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name )
3789 {
3790 int i, j, k, miniim, maxiim, minijm, maxijm;
3791 int *intpt, *intpt2;
3792 double eff1, eff0;
3793 int *hist = NULL;
3794 Achain *ac = NULL;
3795 double minscore;
3796 char **tree;
3797 char *treetmp;
3798 int im = -1, jm = -1;
3799 int prevnode, acjmnext, acjmprev;
3800 int *pt1, *pt2, *pt11, *pt22;
3801 FILE *fp;
3802 int node[2];
3803 double lenfl[2];
3804 char *nametmp, *nameptr, *tmpptr; //static?
3805 char namec;
3806
3807 fp = fopen( "_guidetree", "r" );
3808 if( !fp )
3809 {
3810 reporterr( "cannot open _guidetree\n" );
3811 exit( 1 );
3812 }
3813
3814
3815 if( !hist )
3816 {
3817 // treetmp = AllocateCharVec( njob*50 );
3818 treetmp = NULL;
3819 // tree = AllocateCharMtx( njob, njob*50 );
3820 tree = AllocateCharMtx( njob, 0 );
3821 nametmp = AllocateCharVec( 1000 ); // nagasugi
3822 hist = AllocateIntVec( njob );
3823 ac = (Achain *)malloc( njob * sizeof( Achain ) );
3824 }
3825
3826 for( i=0; i<nseq; i++ )
3827 {
3828 for( j=0; j<999; j++ ) nametmp[j] = 0;
3829 for( j=0; j<999; j++ )
3830 {
3831 namec = name[i][j];
3832 if( namec == 0 )
3833 break;
3834 else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
3835 nametmp[j] = namec;
3836 else
3837 nametmp[j] = '_';
3838 }
3839 nametmp[j] = 0;
3840 // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
3841 if( outnumber )
3842 nameptr = strstr( nametmp, "_numo_e" ) + 8;
3843 else
3844 nameptr = nametmp + 1;
3845
3846 if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
3847
3848 tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
3849 if( tree[i] == NULL )
3850 {
3851 reporterr( "Cannot allocate tree!\n" );
3852 exit( 1 );
3853 }
3854 sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
3855 }
3856
3857 for( i=0; i<nseq; i++ )
3858 {
3859 ac[i].next = i+1;
3860 ac[i].prev = i-1;
3861 // ac[i].curr = i;
3862 }
3863 ac[nseq-1].next = -1;
3864
3865 for( i=0; i<nseq; i++ ) hist[i] = -1;
3866
3867 reporterr( "\n" );
3868 for( k=0; k<nseq-1; k++ )
3869 {
3870 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
3871
3872 #if 0
3873 minscore = 99999.9;
3874 for( i=0; ac[i].next!=-1; i=ac[i].next )
3875 {
3876 for( j=ac[i].next; j!=-1; j=ac[j].next )
3877 {
3878 tmpdouble = eff[i][j];
3879 if( tmpdouble < minscore )
3880 {
3881 minscore = tmpdouble;
3882 im = i; jm = j;
3883 }
3884 }
3885 }
3886 #else
3887 lenfl[0] = lenfl[1] = -1.0;
3888 loadtreeoneline( node, lenfl, fp );
3889 im = node[0];
3890 jm = node[1];
3891 minscore = eff[im][jm];
3892
3893 if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
3894 {
3895 reporterr( "\n\nCheck the guide tree.\n" );
3896 reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
3897 reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
3898 exit( 1 );
3899 }
3900
3901
3902 // reporterr( "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
3903
3904
3905 if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
3906 {
3907 reporterr( "\n\nWARNING: Branch length is not given.\n" );
3908 exit( 1 );
3909 }
3910
3911 if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
3912 if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
3913 #endif
3914
3915 // reporterr( "im=%d, jm=%d\n", im, jm );
3916
3917 intpt = topol[k][0];
3918 prevnode = hist[im];
3919 if( prevnode == -1 )
3920 {
3921 *intpt++ = im;
3922 *intpt = -1;
3923 }
3924 else
3925 {
3926 pt1 = topol[prevnode][0];
3927 pt2 = topol[prevnode][1];
3928 if( *pt1 > *pt2 )
3929 {
3930 pt11 = pt2;
3931 pt22 = pt1;
3932 }
3933 else
3934 {
3935 pt11 = pt1;
3936 pt22 = pt2;
3937 }
3938 for( intpt2=pt11; *intpt2!=-1; )
3939 *intpt++ = *intpt2++;
3940 for( intpt2=pt22; *intpt2!=-1; )
3941 *intpt++ = *intpt2++;
3942 *intpt = -1;
3943 }
3944
3945 intpt = topol[k][1];
3946 prevnode = hist[jm];
3947 if( prevnode == -1 )
3948 {
3949 *intpt++ = jm;
3950 *intpt = -1;
3951 }
3952 else
3953 {
3954 pt1 = topol[prevnode][0];
3955 pt2 = topol[prevnode][1];
3956 if( *pt1 > *pt2 )
3957 {
3958 pt11 = pt2;
3959 pt22 = pt1;
3960 }
3961 else
3962 {
3963 pt11 = pt1;
3964 pt22 = pt2;
3965 }
3966 for( intpt2=pt11; *intpt2!=-1; )
3967 *intpt++ = *intpt2++;
3968 for( intpt2=pt22; *intpt2!=-1; )
3969 *intpt++ = *intpt2++;
3970 *intpt = -1;
3971 }
3972
3973 minscore *= 0.5;
3974
3975 #if 0
3976 len[k][0] = minscore - tmptmplen[im];
3977 len[k][1] = minscore - tmptmplen[jm];
3978 #else
3979 len[k][0] = lenfl[0];
3980 len[k][1] = lenfl[1];
3981 #endif
3982
3983
3984 hist[im] = k;
3985
3986 for( i=0; i!=-1; i=ac[i].next )
3987 {
3988 if( i != im && i != jm )
3989 {
3990 if( i < im )
3991 {
3992 miniim = i;
3993 maxiim = im;
3994 minijm = i;
3995 maxijm = jm;
3996 }
3997 else if( i < jm )
3998 {
3999 miniim = im;
4000 maxiim = i;
4001 minijm = i;
4002 maxijm = jm;
4003 }
4004 else
4005 {
4006 miniim = im;
4007 maxiim = i;
4008 minijm = jm;
4009 maxijm = i;
4010 }
4011 eff0 = eff[miniim][maxiim];
4012 eff1 = eff[minijm][maxijm];
4013 eff[miniim][maxiim] =
4014 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
4015 ( eff0 + eff1 ) * 0.5 * sueff_global;
4016 }
4017 }
4018 acjmprev = ac[jm].prev;
4019 acjmnext = ac[jm].next;
4020 ac[acjmprev].next = acjmnext;
4021 if( acjmnext != -1 )
4022 ac[acjmnext].prev = acjmprev;
4023
4024
4025 treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
4026 if( !treetmp )
4027 {
4028 reporterr( "Cannot allocate treetmp\n" );
4029 exit( 1 );
4030 }
4031 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
4032 free( tree[im] );
4033 free( tree[jm] );
4034 tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
4035 tree[jm] = NULL;
4036 if( tree[im] == NULL )
4037 {
4038 reporterr( "Cannot reallocate tree!\n" );
4039 exit( 1 );
4040 }
4041 strcpy( tree[im], treetmp );
4042
4043 // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
4044 // strcpy( tree[im], treetmp );
4045
4046 #if 0
4047 fprintf( stdout, "STEP-%03d:\n", k+1 );
4048 fprintf( stdout, "len0 = %f\n", len[k][0] );
4049 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
4050 fprintf( stdout, "\n" );
4051 fprintf( stdout, "len1 = %f\n", len[k][1] );
4052 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
4053 fprintf( stdout, "\n" );
4054 #endif
4055 }
4056 fclose( fp );
4057
4058
4059 fp = fopen( "infile.tree", "w" );
4060 fprintf( fp, "%s\n", treetmp );
4061 // fprintf( fp, "by veryfastsupg_double_loadtree\n" );
4062 fclose( fp );
4063
4064 #if 1
4065 reporterr( "\n" );
4066 free( hist );
4067 free( (char *)ac );
4068 FreeCharMtx( tree );
4069 free( treetmp );
4070 free( nametmp );
4071 #endif
4072
4073 #if 0
4074 // reporterr( "reconstructing eff[][]\n" ); // Tsune ni hat2 ha aru node koreha iranai.
4075 for( k=0; k<nseq; k++ ) for( i=0; i<nseq; i++ ) eff[i][k] = 0.0;
4076 for( k=0; k<nseq-1; k++ )
4077 {
4078 reporterr( "len[k][0], len[k][1] = %f, %f\n", len[k][0], len[k][1] );
4079 for( i=0; (im=topol[k][0][i])>-1; i++ )
4080 {
4081 reporterr( " %03d", im );
4082 }
4083 fprintf( stdout, "\n" );
4084 for( i=0; (jm=topol[k][1][i])>-1; i++ )
4085 {
4086 reporterr( " %03d", jm );
4087 }
4088 for( i=0; (im=topol[k][0][i])>-1; i++ ) for( j=0; (jm=topol[k][1][j])>-1; j++ )
4089 {
4090 eff[im][jm] += len[k][0] + len[k][1];
4091 eff[jm][im] += len[k][0] + len[k][1];
4092 }
4093 }
4094 #endif
4095 }
4096
4097 #if 0
4098 void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
4099 {
4100 int i, j, k, miniim, maxiim, minijm, maxijm;
4101 int *intpt, *intpt2;
4102 double tmpdouble;
4103 double eff1, eff0;
4104 static double *tmptmplen = NULL;
4105 static int *hist = NULL;
4106 static Achain *ac = NULL;
4107 double minscore;
4108 int im = -1, jm = -1;
4109 int prevnode, acjmnext, acjmprev;
4110 int *pt1, *pt2, *pt11, *pt22;
4111 if( !hist )
4112 {
4113 hist = AllocateIntVec( njob );
4114 tmptmplen = (double *)malloc( njob * sizeof( double ) );
4115 ac = (Achain *)malloc( njob * sizeof( Achain ) );
4116 }
4117
4118 for( i=0; i<nseq; i++ )
4119 {
4120 ac[i].next = i+1;
4121 ac[i].prev = i-1;
4122 // ac[i].curr = i;
4123 }
4124 ac[nseq-1].next = -1;
4125
4126 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
4127 for( i=0; i<nseq; i++ ) hist[i] = -1;
4128
4129 reporterr( "\n" );
4130 for( k=0; k<nseq-1; k++ )
4131 {
4132 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
4133
4134 minscore = 99999.9;
4135 for( i=0; ac[i].next!=-1; i=ac[i].next )
4136 {
4137 for( j=ac[i].next; j!=-1; j=ac[j].next )
4138 {
4139 tmpdouble = eff[i][j];
4140 if( tmpdouble < minscore )
4141 {
4142 minscore = tmpdouble;
4143 im = i; jm = j;
4144 }
4145 }
4146 }
4147
4148 // reporterr( "im=%d, jm=%d\n", im, jm );
4149
4150 intpt = topol[k][0];
4151 prevnode = hist[im];
4152 if( prevnode == -1 )
4153 {
4154 *intpt++ = im;
4155 *intpt = -1;
4156 }
4157 else
4158 {
4159 pt1 = topol[prevnode][0];
4160 pt2 = topol[prevnode][1];
4161 if( *pt1 > *pt2 )
4162 {
4163 pt11 = pt2;
4164 pt22 = pt1;
4165 }
4166 else
4167 {
4168 pt11 = pt1;
4169 pt22 = pt2;
4170 }
4171 for( intpt2=pt11; *intpt2!=-1; )
4172 *intpt++ = *intpt2++;
4173 for( intpt2=pt22; *intpt2!=-1; )
4174 *intpt++ = *intpt2++;
4175 *intpt = -1;
4176 }
4177
4178 intpt = topol[k][1];
4179 prevnode = hist[jm];
4180 if( prevnode == -1 )
4181 {
4182 *intpt++ = jm;
4183 *intpt = -1;
4184 }
4185 else
4186 {
4187 pt1 = topol[prevnode][0];
4188 pt2 = topol[prevnode][1];
4189 if( *pt1 > *pt2 )
4190 {
4191 pt11 = pt2;
4192 pt22 = pt1;
4193 }
4194 else
4195 {
4196 pt11 = pt1;
4197 pt22 = pt2;
4198 }
4199 for( intpt2=pt11; *intpt2!=-1; )
4200 *intpt++ = *intpt2++;
4201 for( intpt2=pt22; *intpt2!=-1; )
4202 *intpt++ = *intpt2++;
4203 *intpt = -1;
4204 }
4205
4206 minscore *= 0.5;
4207
4208 len[k][0] = minscore - tmptmplen[im];
4209 len[k][1] = minscore - tmptmplen[jm];
4210
4211 tmptmplen[im] = minscore;
4212
4213 hist[im] = k;
4214
4215 for( i=0; i!=-1; i=ac[i].next )
4216 {
4217 if( i != im && i != jm )
4218 {
4219 if( i < im )
4220 {
4221 miniim = i;
4222 maxiim = im;
4223 minijm = i;
4224 maxijm = jm;
4225 }
4226 else if( i < jm )
4227 {
4228 miniim = im;
4229 maxiim = i;
4230 minijm = i;
4231 maxijm = jm;
4232 }
4233 else
4234 {
4235 miniim = im;
4236 maxiim = i;
4237 minijm = jm;
4238 maxijm = i;
4239 }
4240 eff0 = eff[miniim][maxiim];
4241 eff1 = eff[minijm][maxijm];
4242 eff[miniim][maxiim] =
4243 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
4244 ( eff0 + eff1 ) * 0.5 * sueff_global;
4245 }
4246 }
4247 acjmprev = ac[jm].prev;
4248 acjmnext = ac[jm].next;
4249 ac[acjmprev].next = acjmnext;
4250 if( acjmnext != -1 )
4251 ac[acjmnext].prev = acjmprev;
4252 #if 0
4253 fprintf( stdout, "STEP-%03d:\n", k+1 );
4254 fprintf( stdout, "len0 = %f\n", len[k][0] );
4255 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
4256 fprintf( stdout, "\n" );
4257 fprintf( stdout, "len1 = %f\n", len[k][1] );
4258 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
4259 fprintf( stdout, "\n" );
4260 #endif
4261 }
4262 #if 1
4263 reporterr( "\n" );
4264 free( (void *)tmptmplen ); tmptmplen = NULL;
4265 free( hist ); hist = NULL;
4266 free( (char *)ac ); ac = NULL;
4267 #endif
4268 }
4269 #endif
4270
veryfastsupg_double_outtree(int nseq,double ** eff,int *** topol,double ** len,char ** name)4271 void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used
4272 {
4273 int i, j, k, miniim, maxiim, minijm, maxijm;
4274 int *intpt, *intpt2;
4275 double tmpdouble;
4276 double eff1, eff0;
4277 static double *tmptmplen = NULL;
4278 static int *hist = NULL;
4279 static Achain *ac = NULL;
4280 double minscore;
4281 static char **tree;
4282 static char *treetmp;
4283 static char *nametmp;
4284 FILE *fpout;
4285 int im = -1, jm = -1;
4286 int prevnode, acjmnext, acjmprev;
4287 int *pt1, *pt2, *pt11, *pt22;
4288 double (*clusterfuncpt[1])(double,double);
4289
4290
4291 sueff1 = 1 - sueff_global;
4292 sueff05 = sueff_global * 0.5;
4293 if ( treemethod == 'X' )
4294 clusterfuncpt[0] = cluster_mix_double;
4295 else if ( treemethod == 'E' )
4296 clusterfuncpt[0] = cluster_average_double;
4297 else if ( treemethod == 'q' )
4298 clusterfuncpt[0] = cluster_minimum_double;
4299 else
4300 {
4301 reporterr( "Unknown treemethod, %c\n", treemethod );
4302 exit( 1 );
4303 }
4304
4305 if( !hist )
4306 {
4307 treetmp = AllocateCharVec( njob*50 );
4308 tree = AllocateCharMtx( njob, njob*50 );
4309 hist = AllocateIntVec( njob );
4310 tmptmplen = (double *)malloc( njob * sizeof( double ) );
4311 ac = (Achain *)malloc( njob * sizeof( Achain ) );
4312 nametmp = AllocateCharVec( 31 );
4313 }
4314
4315 // for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
4316 for( i=0; i<nseq; i++ )
4317 {
4318 for( j=0; j<30; j++ ) nametmp[j] = 0;
4319 for( j=0; j<30; j++ )
4320 {
4321 if( isalnum( name[i][j] ) )
4322 nametmp[j] = name[i][j];
4323 else
4324 nametmp[j] = '_';
4325 }
4326 nametmp[30] = 0;
4327 sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
4328 }
4329
4330 for( i=0; i<nseq; i++ )
4331 {
4332 ac[i].next = i+1;
4333 ac[i].prev = i-1;
4334 // ac[i].curr = i;
4335 }
4336 ac[nseq-1].next = -1;
4337
4338 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
4339 for( i=0; i<nseq; i++ ) hist[i] = -1;
4340
4341 reporterr( "\n" );
4342 for( k=0; k<nseq-1; k++ )
4343 {
4344 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
4345
4346 minscore = 99999.9;
4347 for( i=0; ac[i].next!=-1; i=ac[i].next )
4348 {
4349 for( j=ac[i].next; j!=-1; j=ac[j].next )
4350 {
4351 tmpdouble = eff[i][j];
4352 if( tmpdouble < minscore )
4353 {
4354 minscore = tmpdouble;
4355 im = i; jm = j;
4356 }
4357 }
4358 }
4359
4360 // reporterr( "im=%d, jm=%d\n", im, jm );
4361
4362 intpt = topol[k][0];
4363 prevnode = hist[im];
4364 if( prevnode == -1 )
4365 {
4366 *intpt++ = im;
4367 *intpt = -1;
4368 }
4369 else
4370 {
4371 pt1 = topol[prevnode][0];
4372 pt2 = topol[prevnode][1];
4373 if( *pt1 > *pt2 )
4374 {
4375 pt11 = pt2;
4376 pt22 = pt1;
4377 }
4378 else
4379 {
4380 pt11 = pt1;
4381 pt22 = pt2;
4382 }
4383 for( intpt2=pt11; *intpt2!=-1; )
4384 *intpt++ = *intpt2++;
4385 for( intpt2=pt22; *intpt2!=-1; )
4386 *intpt++ = *intpt2++;
4387 *intpt = -1;
4388 }
4389
4390 intpt = topol[k][1];
4391 prevnode = hist[jm];
4392 if( prevnode == -1 )
4393 {
4394 *intpt++ = jm;
4395 *intpt = -1;
4396 }
4397 else
4398 {
4399 pt1 = topol[prevnode][0];
4400 pt2 = topol[prevnode][1];
4401 if( *pt1 > *pt2 )
4402 {
4403 pt11 = pt2;
4404 pt22 = pt1;
4405 }
4406 else
4407 {
4408 pt11 = pt1;
4409 pt22 = pt2;
4410 }
4411 for( intpt2=pt11; *intpt2!=-1; )
4412 *intpt++ = *intpt2++;
4413 for( intpt2=pt22; *intpt2!=-1; )
4414 *intpt++ = *intpt2++;
4415 *intpt = -1;
4416 }
4417
4418 minscore *= 0.5;
4419
4420 len[k][0] = minscore - tmptmplen[im];
4421 len[k][1] = minscore - tmptmplen[jm];
4422
4423 tmptmplen[im] = minscore;
4424
4425 hist[im] = k;
4426
4427 for( i=0; i!=-1; i=ac[i].next )
4428 {
4429 if( i != im && i != jm )
4430 {
4431 if( i < im )
4432 {
4433 miniim = i;
4434 maxiim = im;
4435 minijm = i;
4436 maxijm = jm;
4437 }
4438 else if( i < jm )
4439 {
4440 miniim = im;
4441 maxiim = i;
4442 minijm = i;
4443 maxijm = jm;
4444 }
4445 else
4446 {
4447 miniim = im;
4448 maxiim = i;
4449 minijm = jm;
4450 maxijm = i;
4451 }
4452 eff0 = eff[miniim][maxiim];
4453 eff1 = eff[minijm][maxijm];
4454 eff[miniim][maxiim] =
4455 (clusterfuncpt[0])( eff0, eff1 );
4456 }
4457 }
4458 acjmprev = ac[jm].prev;
4459 acjmnext = ac[jm].next;
4460 ac[acjmprev].next = acjmnext;
4461 if( acjmnext != -1 )
4462 ac[acjmnext].prev = acjmprev;
4463
4464 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
4465 strcpy( tree[im], treetmp );
4466 #if 0
4467 fprintf( stdout, "STEP-%03d:\n", k+1 );
4468 fprintf( stdout, "len0 = %f\n", len[k][0] );
4469 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
4470 fprintf( stdout, "\n" );
4471 fprintf( stdout, "len1 = %f\n", len[k][1] );
4472 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
4473 fprintf( stdout, "\n" );
4474 #endif
4475 }
4476 fpout = fopen( "infile.tree", "w" );
4477 fprintf( fpout, "%s\n", treetmp );
4478 // fprintf( fpout, "by veryfastsupg_double_outtree\n" );
4479 fclose( fpout );
4480 #if 1
4481 reporterr( "\n" );
4482 free( (void *)tmptmplen ); tmptmplen = NULL;
4483 free( hist ); hist = NULL;
4484 free( (char *)ac ); ac = NULL;
4485 FreeCharMtx( tree );
4486 free( treetmp );
4487 free( nametmp );
4488 #endif
4489 }
4490
veryfastsupg(int nseq,double ** oeff,int *** topol,double ** len)4491 void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
4492 {
4493 int i, j, k, miniim, maxiim, minijm, maxijm;
4494 int *intpt, *intpt2;
4495 int tmpint;
4496 int eff1, eff0;
4497 static double *tmptmplen = NULL;
4498 static int **eff = NULL;
4499 static int *hist = NULL;
4500 static Achain *ac = NULL;
4501 int minscore;
4502 double minscoref;
4503 int im = -1, jm = -1;
4504 int prevnode, acjmnext, acjmprev;
4505 int *pt1, *pt2, *pt11, *pt22;
4506 if( !eff )
4507 {
4508 eff = AllocateIntMtx( njob, njob );
4509 hist = AllocateIntVec( njob );
4510 tmptmplen = (double *)malloc( njob * sizeof( double ) );
4511 ac = (Achain *)malloc( njob * sizeof( Achain ) );
4512 }
4513
4514 for( i=0; i<nseq; i++ )
4515 {
4516 for( j=0; j<nseq; j++ )
4517 {
4518 eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
4519 }
4520 }
4521
4522 for( i=0; i<nseq; i++ )
4523 {
4524 ac[i].next = i+1;
4525 ac[i].prev = i-1;
4526 // ac[i].curr = i;
4527 }
4528 ac[nseq-1].next = -1;
4529
4530 for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
4531 for( i=0; i<nseq; i++ ) hist[i] = -1;
4532
4533 reporterr( "\n" );
4534 for( k=0; k<nseq-1; k++ )
4535 {
4536 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
4537
4538 minscore = INTMTXSCALE*4;
4539 for( i=0; ac[i].next!=-1; i=ac[i].next )
4540 {
4541 for( j=ac[i].next; j!=-1; j=ac[j].next )
4542 {
4543 tmpint = eff[i][j];
4544 if( tmpint < minscore )
4545 {
4546 minscore = tmpint;
4547 im = i; jm = j;
4548 }
4549 }
4550 }
4551 minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
4552
4553 // reporterr( "im=%d, jm=%d\n", im, jm );
4554
4555 #if 1
4556 intpt = topol[k][0];
4557 prevnode = hist[im];
4558 if( prevnode == -1 )
4559 {
4560 *intpt++ = im;
4561 *intpt = -1;
4562 }
4563 else
4564 {
4565 pt1 = topol[prevnode][0];
4566 pt2 = topol[prevnode][1];
4567 if( *pt1 > *pt2 )
4568 {
4569 pt11 = pt2;
4570 pt22 = pt1;
4571 }
4572 else
4573 {
4574 pt11 = pt1;
4575 pt22 = pt2;
4576 }
4577 for( intpt2=pt11; *intpt2!=-1; )
4578 *intpt++ = *intpt2++;
4579 for( intpt2=pt22; *intpt2!=-1; )
4580 *intpt++ = *intpt2++;
4581 *intpt = -1;
4582 }
4583
4584 intpt = topol[k][1];
4585 prevnode = hist[jm];
4586 if( prevnode == -1 )
4587 {
4588 *intpt++ = jm;
4589 *intpt = -1;
4590 }
4591 else
4592 {
4593 pt1 = topol[prevnode][0];
4594 pt2 = topol[prevnode][1];
4595 if( *pt1 > *pt2 )
4596 {
4597 pt11 = pt2;
4598 pt22 = pt1;
4599 }
4600 else
4601 {
4602 pt11 = pt1;
4603 pt22 = pt2;
4604 }
4605 for( intpt2=pt11; *intpt2!=-1; )
4606 *intpt++ = *intpt2++;
4607 for( intpt2=pt22; *intpt2!=-1; )
4608 *intpt++ = *intpt2++;
4609 *intpt = -1;
4610 }
4611 #else
4612 intpt = topol[k][0];
4613 for( i=0; i<nseq; i++ )
4614 if( pair[im][i] > -2 )
4615 *intpt++ = i;
4616 *intpt = -1;
4617
4618 intpt = topol[k][1];
4619 for( i=0; i<nseq; i++ )
4620 if( pair[jm][i] > -2 )
4621 *intpt++ = i;
4622 *intpt = -1;
4623 #endif
4624
4625 len[k][0] = minscoref - tmptmplen[im];
4626 len[k][1] = minscoref - tmptmplen[jm];
4627
4628 tmptmplen[im] = minscoref;
4629
4630 hist[im] = k;
4631
4632 for( i=0; i!=-1; i=ac[i].next )
4633 {
4634 if( i != im && i != jm )
4635 {
4636 if( i < im )
4637 {
4638 miniim = i;
4639 maxiim = im;
4640 minijm = i;
4641 maxijm = jm;
4642 }
4643 else if( i < jm )
4644 {
4645 miniim = im;
4646 maxiim = i;
4647 minijm = i;
4648 maxijm = jm;
4649 }
4650 else
4651 {
4652 miniim = im;
4653 maxiim = i;
4654 minijm = jm;
4655 maxijm = i;
4656 }
4657 eff0 = eff[miniim][maxiim];
4658 eff1 = eff[minijm][maxijm];
4659 eff[miniim][maxiim] =
4660 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + // int??
4661 ( eff0 + eff1 ) * 0.5 * sueff_global; // int??
4662 }
4663 }
4664 acjmprev = ac[jm].prev;
4665 acjmnext = ac[jm].next;
4666 ac[acjmprev].next = acjmnext;
4667 if( acjmnext != -1 )
4668 ac[acjmnext].prev = acjmprev;
4669 #if 0
4670 fprintf( stdout, "STEP-%03d:\n", k+1 );
4671 fprintf( stdout, "len0 = %f\n", len[k][0] );
4672 for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
4673 fprintf( stdout, "\n" );
4674 fprintf( stdout, "len1 = %f\n", len[k][1] );
4675 for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
4676 fprintf( stdout, "\n" );
4677 #endif
4678 }
4679 #if 1
4680 FreeIntMtx( eff ); eff = NULL;
4681 free( (void *)tmptmplen ); tmptmplen = NULL;
4682 free( hist ); hist = NULL;
4683 free( (char *)ac ); ac = NULL;
4684 #endif
4685 }
4686
fastsupg(int nseq,double ** oeff,int *** topol,double ** len)4687 void fastsupg( int nseq, double **oeff, int ***topol, double **len )
4688 {
4689 int i, j, k, miniim, maxiim, minijm, maxijm;
4690 #if 0
4691 double eff[nseq][nseq];
4692 char pair[njob][njob];
4693 #else
4694 static double *tmplen;
4695 int *intpt;
4696 double tmpdouble;
4697 double eff1, eff0;
4698 static double **eff = NULL;
4699 static char **pair = NULL;
4700 static Achain *ac;
4701 double minscore;
4702 int im = -1, jm = -1;
4703 if( !eff )
4704 {
4705 eff = AllocateFloatMtx( njob, njob );
4706 pair = AllocateCharMtx( njob, njob );
4707 tmplen = AllocateFloatVec( njob );
4708 ac = (Achain *)calloc( njob, sizeof( Achain ) );
4709 }
4710 #endif
4711
4712 for( i=0; i<nseq; i++ )
4713 {
4714 for( j=0; j<nseq; j++ )
4715 {
4716 eff[i][j] = (double)oeff[i][j];
4717 }
4718 }
4719
4720 for( i=0; i<nseq; i++ )
4721 {
4722 ac[i].next = i+1;
4723 ac[i].prev = i-1;
4724 // ac[i].curr = i;
4725 }
4726 ac[nseq-1].next = -1;
4727
4728 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
4729 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
4730 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
4731
4732 reporterr( "\n" );
4733 for( k=0; k<nseq-1; k++ )
4734 {
4735 if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
4736
4737 minscore = 9999.0;
4738 for( i=0; ac[i].next!=-1; i=ac[i].next )
4739 // for( i=0; i<nseq-1; i++ )
4740 {
4741 for( j=ac[i].next; j!=-1; j=ac[j].next )
4742 // for( j=i+1; j<nseq; j++ )
4743 {
4744 tmpdouble = eff[i][j];
4745 if( tmpdouble < minscore )
4746 {
4747 minscore = tmpdouble;
4748 im = i; jm = j;
4749 }
4750 }
4751 }
4752
4753 // reporterr( "im=%d, jm=%d\n", im, jm );
4754
4755 intpt = topol[k][0];
4756 for( i=0; i<nseq; i++ )
4757 if( pair[im][i] > 0 )
4758 *intpt++ = i;
4759 *intpt = -1;
4760
4761 intpt = topol[k][1];
4762 for( i=0; i<nseq; i++ )
4763 if( pair[jm][i] > 0 )
4764 *intpt++ = i;
4765 *intpt = -1;
4766
4767 minscore /= 2.0;
4768
4769 len[k][0] = (double)minscore - tmplen[im];
4770 len[k][1] = (double)minscore - tmplen[jm];
4771
4772 tmplen[im] = (double)minscore;
4773
4774 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
4775 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
4776
4777 // for( i=0; i<nseq; i++ )
4778 for( i=0; i!=-1; i=ac[i].next )
4779 {
4780 if( i != im && i != jm )
4781 {
4782 if( i < im )
4783 {
4784 miniim = i;
4785 maxiim = im;
4786 minijm = i;
4787 maxijm = jm;
4788 }
4789 else if( i < jm )
4790 {
4791 miniim = im;
4792 maxiim = i;
4793 minijm = i;
4794 maxijm = jm;
4795 }
4796 else
4797 {
4798 miniim = im;
4799 maxiim = i;
4800 minijm = jm;
4801 maxijm = i;
4802 }
4803 eff0 = eff[miniim][maxiim];
4804 eff1 = eff[minijm][maxijm];
4805 eff[miniim][maxiim] =
4806 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
4807 ( eff0 + eff1 ) * 0.5 * sueff_global;
4808 // eff[minijm][maxijm] = 9999.0;
4809 }
4810 }
4811 ac[ac[jm].prev].next = ac[jm].next;
4812 ac[ac[jm].next].prev = ac[jm].prev;
4813 // eff[im][jm] = 9999.0;
4814 #if 0
4815 reporterr( "STEP-%03d:\n", k+1 );
4816 reporterr( "len0 = %f\n", len[k][0] );
4817 for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i] );
4818 reporterr( "\n" );
4819 reporterr( "len1 = %f\n", len[k][1] );
4820 for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i] );
4821 reporterr( "\n" );
4822 #endif
4823 }
4824 reporterr( "\n" );
4825
4826 // FreeFloatMtx( eff );
4827 // FreeCharMtx( pair );
4828 // FreeFloatVec( tmplen );
4829 // free( ac );
4830 }
supg(int nseq,double ** oeff,int *** topol,double ** len)4831 void supg( int nseq, double **oeff, int ***topol, double **len )
4832 {
4833 int i, j, k, miniim, maxiim, minijm, maxijm;
4834 #if 0
4835 double eff[nseq][nseq];
4836 char pair[njob][njob];
4837 #else
4838 static double *tmplen;
4839 int *intpt;
4840 double **doubleptpt;
4841 double *doublept;
4842 double tmpdouble;
4843 double eff1, eff0;
4844 static double **eff = NULL;
4845 static char **pair = NULL;
4846 if( !eff )
4847 {
4848 eff = AllocateFloatMtx( njob, njob );
4849 pair = AllocateCharMtx( njob, njob );
4850 tmplen = AllocateFloatVec( njob );
4851 }
4852 #endif
4853
4854
4855 for( i=0; i<nseq; i++ )
4856 {
4857 for( j=0; j<nseq; j++ )
4858 {
4859 eff[i][j] = (double)oeff[i][j];
4860 }
4861 }
4862 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
4863 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
4864 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
4865
4866 for( k=0; k<nseq-1; k++ )
4867 {
4868 double minscore = 9999.0;
4869 int im = -1, jm = -1;
4870
4871
4872 doubleptpt = eff;
4873 for( i=0; i<nseq-1; i++ )
4874 {
4875 doublept = *doubleptpt++ + i + 1;
4876 for( j=i+1; j<nseq; j++ )
4877 {
4878 tmpdouble = *doublept++;
4879 if( tmpdouble < minscore )
4880 {
4881 minscore = tmpdouble;
4882 im = i; jm = j;
4883 }
4884 }
4885 }
4886 intpt = topol[k][0];
4887 for( i=0; i<nseq; i++ )
4888 if( pair[im][i] > 0 )
4889 *intpt++ = i;
4890 *intpt = -1;
4891
4892 intpt = topol[k][1];
4893 for( i=0; i<nseq; i++ )
4894 if( pair[jm][i] > 0 )
4895 *intpt++ = i;
4896 *intpt = -1;
4897
4898 len[k][0] = (double)minscore / 2.0 - tmplen[im];
4899 len[k][1] = (double)minscore / 2.0 - tmplen[jm];
4900
4901 tmplen[im] = (double)minscore / 2.0;
4902
4903 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
4904 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
4905
4906 for( i=0; i<nseq; i++ )
4907 {
4908 if( i != im && i != jm )
4909 {
4910 #if 1
4911 if( i < im )
4912 {
4913 miniim = i;
4914 maxiim = im;
4915 minijm = i;
4916 maxijm = jm;
4917 }
4918 else if( i < jm )
4919 {
4920 miniim = im;
4921 maxiim = i;
4922 minijm = i;
4923 maxijm = jm;
4924 }
4925 else
4926 {
4927 miniim = im;
4928 maxiim = i;
4929 minijm = jm;
4930 maxijm = i;
4931 }
4932 #else
4933 miniim = MIN( i, im );
4934 maxiim = MAX( i, im );
4935 minijm = MIN( i, jm );
4936 maxijm = MAX( i, jm );
4937 #endif
4938 #if 1
4939 eff0 = eff[miniim][maxiim];
4940 eff1 = eff[minijm][maxijm];
4941 eff[miniim][maxiim] =
4942 MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
4943 ( eff0 + eff1 ) * 0.5 * sueff_global;
4944 #else
4945 MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - sueff_global ) +
4946 ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * sueff_global;
4947 #endif
4948 eff[minijm][maxijm] = 9999.0;
4949 eff[im][jm] = 9999.0;
4950 }
4951 }
4952 #if DEBUG
4953 printf( "STEP-%03d:\n", k+1 );
4954 printf( "len0 = %f\n", len[k][0] );
4955 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
4956 printf( "\n" );
4957 printf( "len1 = %f\n", len[k][1] );
4958 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
4959 printf( "\n" );
4960 #endif
4961 }
4962 }
4963
spg(int nseq,double ** oeff,int *** topol,double ** len)4964 void spg( int nseq, double **oeff, int ***topol, double **len )
4965 {
4966 int i, j, k;
4967 double tmplen[M];
4968 #if 0
4969 double eff[nseq][nseq];
4970 char pair[njob][njob];
4971 #else
4972 double **eff = NULL;
4973 char **pair = NULL;
4974 if( !eff )
4975 {
4976 eff = AllocateDoubleMtx( njob, njob );
4977 pair = AllocateCharMtx( njob, njob );
4978 }
4979 #endif
4980
4981 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
4982 for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
4983 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
4984 for( i=0; i<nseq; i++ ) pair[i][i] = 1;
4985
4986 for( k=0; k<nseq-1; k++ )
4987 {
4988 double minscore = 9999.0;
4989 int im = -1, jm = -1;
4990 int count;
4991
4992 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
4993 {
4994 if( eff[i][j] < minscore )
4995 {
4996 minscore = eff[i][j];
4997 im = i; jm = j;
4998 }
4999 }
5000 for( i=0, count=0; i<nseq; i++ )
5001 if( pair[im][i] > 0 )
5002 {
5003 topol[k][0][count] = i;
5004 count++;
5005 }
5006 topol[k][0][count] = -1;
5007 for( i=0, count=0; i<nseq; i++ )
5008 if( pair[jm][i] > 0 )
5009 {
5010 topol[k][1][count] = i;
5011 count++;
5012 }
5013 topol[k][1][count] = -1;
5014
5015 len[k][0] = minscore / 2.0 - tmplen[im];
5016 len[k][1] = minscore / 2.0 - tmplen[jm];
5017
5018 tmplen[im] = minscore / 2.0;
5019
5020 for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
5021 for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
5022
5023 for( i=0; i<nseq; i++ )
5024 {
5025 if( i != im && i != jm )
5026 {
5027 eff[MIN(i,im)][MAX(i,im)] =
5028 MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
5029 eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
5030 }
5031 eff[im][jm] = 9999.0;
5032 }
5033 #if DEBUG
5034 printf( "STEP-%03d:\n", k+1 );
5035 printf( "len0 = %f\n", len[k][0] );
5036 for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
5037 printf( "\n" );
5038 printf( "len1 = %f\n", len[k][1] );
5039 for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
5040 printf( "\n" );
5041 #endif
5042 }
5043 }
5044
ipower(double x,int n)5045 double ipower( double x, int n ) /* n > 0 */
5046 {
5047 double r;
5048
5049 r = 1;
5050 while( n != 0 )
5051 {
5052 if( n & 1 ) r *= x;
5053 x *= x; n >>= 1;
5054 }
5055 return( r );
5056 }
5057
countnode(int nseq,int *** topol,double ** node)5058 void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
5059 {
5060 int i, j, k, s1, s2;
5061 static double rootnode[M];
5062
5063 if( nseq-2 < 0 )
5064 {
5065 reporterr( "Too few sequence for countnode: nseq = %d\n", nseq );
5066 exit( 1 );
5067 }
5068
5069 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
5070 for( i=0; i<nseq-2; i++ )
5071 {
5072 for( j=0; topol[i][0][j]>-1; j++ )
5073 rootnode[topol[i][0][j]]++;
5074 for( j=0; topol[i][1][j]>-1; j++ )
5075 rootnode[topol[i][1][j]]++;
5076 for( j=0; topol[i][0][j]>-1; j++ )
5077 {
5078 s1 = topol[i][0][j];
5079 for( k=0; topol[i][1][k]>-1; k++ )
5080 {
5081 s2 = topol[i][1][k];
5082 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
5083 }
5084 }
5085 }
5086 for( j=0; topol[nseq-2][0][j]>-1; j++ )
5087 {
5088 s1 = topol[nseq-2][0][j];
5089 for( k=0; topol[nseq-2][1][k]>-1; k++ )
5090 {
5091 s2 = topol[nseq-2][1][k];
5092 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
5093 }
5094 }
5095 }
5096
countnode_int(int nseq,int *** topol,int ** node)5097 void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
5098 {
5099 int i, j, k, s1, s2;
5100 int rootnode[M];
5101
5102 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
5103 for( i=0; i<nseq-2; i++ )
5104 {
5105 for( j=0; topol[i][0][j]>-1; j++ )
5106 rootnode[topol[i][0][j]]++;
5107 for( j=0; topol[i][1][j]>-1; j++ )
5108 rootnode[topol[i][1][j]]++;
5109 for( j=0; topol[i][0][j]>-1; j++ )
5110 {
5111 s1 = topol[i][0][j];
5112 for( k=0; topol[i][1][k]>-1; k++ )
5113 {
5114 s2 = topol[i][1][k];
5115 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
5116 }
5117 }
5118 }
5119 for( j=0; topol[nseq-2][0][j]>-1; j++ )
5120 {
5121 s1 = topol[nseq-2][0][j];
5122 for( k=0; topol[nseq-2][1][k]>-1; k++ )
5123 {
5124 s2 = topol[nseq-2][1][k];
5125 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
5126 }
5127 }
5128 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5129 node[j][i] = node[i][j];
5130 #if DEBUG
5131 reporterr( "node[][] in countnode_int" );
5132 for( i=0; i<nseq; i++ )
5133 {
5134 for( j=0; j<nseq; j++ )
5135 {
5136 reporterr( "%#3d", node[i][j] );
5137 }
5138 reporterr( "\n" );
5139 }
5140 #endif
5141 }
5142
counteff_simple_double(int nseq,int *** topol,double ** len,double * node)5143 void counteff_simple_double( int nseq, int ***topol, double **len, double *node )
5144 {
5145 int i, j, s1, s2;
5146 double total;
5147 static double rootnode[M];
5148 static double eff[M];
5149
5150 #if DEBUG
5151 for( i=0; i<nseq; i++ ){
5152 reporterr( "len0 = %f\n", len[i][0] );
5153 reporterr( "len1 = %f\n", len[i][1] );
5154 }
5155 #endif
5156 for( i=0; i<nseq; i++ )
5157 {
5158 rootnode[i] = 0.0;
5159 eff[i] = 1.0;
5160 /*
5161 rootnode[i] = 1.0;
5162 */
5163 }
5164 for( i=0; i<nseq-1; i++ )
5165 {
5166 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
5167 {
5168 rootnode[s1] += (double)len[i][0] * eff[s1];
5169 eff[s1] *= 0.5;
5170 /*
5171 rootnode[s1] *= 0.5;
5172 */
5173
5174 }
5175 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
5176 {
5177 rootnode[s2] += (double)len[i][1] * eff[s2];
5178 eff[s2] *= 0.5;
5179 /*
5180 rootnode[s2] *= 0.5;
5181 */
5182
5183 }
5184 }
5185 for( i=0; i<nseq; i++ )
5186 {
5187 #if 1 /* 97.9.29 */
5188 rootnode[i] += GETA3;
5189 #endif
5190 #if 0
5191 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
5192 #endif
5193 }
5194 #if 1
5195 total = 0.0;
5196 for( i=0; i<nseq; i++ )
5197 {
5198 total += rootnode[i];
5199 }
5200 #else
5201 total = 1.0;
5202 #endif
5203
5204 for( i=0; i<nseq; i++ )
5205 {
5206 node[i] = rootnode[i] / total;
5207 }
5208
5209 #if 0
5210 reporterr( "weight array in counteff_simple\n" );
5211 for( i=0; i<nseq; i++ )
5212 reporterr( "%f\n", node[i] );
5213 printf( "\n" );
5214 exit( 1 );
5215 #endif
5216 }
5217
5218
counteff_simple_double_nostatic(int nseq,int *** topol,double ** len,double * node)5219 void counteff_simple_double_nostatic( int nseq, int ***topol, double **len, double *node )
5220 {
5221 int i, j, s1, s2;
5222 double total;
5223 double *rootnode;
5224 double *eff;
5225
5226 rootnode = AllocateDoubleVec( nseq );
5227 eff = AllocateDoubleVec( nseq );
5228
5229 for( i=0; i<nseq; i++ ) // 2014/06/07, fu no eff wo sakeru.
5230 {
5231 if( len[i][0] < 0.0 )
5232 {
5233 reporterr( "WARNING: negative branch length %f, step %d-0\n", len[i][0], i );
5234 len[i][0] = 0.0;
5235 }
5236 if( len[i][1] < 0.0 )
5237 {
5238 reporterr( "WARNING: negative branch length %f, step %d-1\n", len[i][1], i );
5239 len[i][1] = 0.0;
5240 }
5241 }
5242 #if DEBUG
5243 for( i=0; i<nseq-1; i++ )
5244 {
5245 reporterr( "\nstep %d, group 0\n", i );
5246 for( j=0; topol[i][0][j]!=-1; j++) reporterr( "%3d ", topol[i][0][j] );
5247 reporterr( "\n", i );
5248 reporterr( "step %d, group 1\n", i );
5249 for( j=0; topol[i][1][j]!=-1; j++) reporterr( "%3d ", topol[i][1][j] );
5250 reporterr( "\n", i );
5251 reporterr( "len0 = %f\n", len[i][0] );
5252 reporterr( "len1 = %f\n", len[i][1] );
5253 }
5254 #endif
5255 for( i=0; i<nseq; i++ )
5256 {
5257 rootnode[i] = 0.0;
5258 eff[i] = 1.0;
5259 /*
5260 rootnode[i] = 1.0;
5261 */
5262 }
5263 for( i=0; i<nseq-1; i++ )
5264 {
5265 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
5266 {
5267 rootnode[s1] += (double)len[i][0] * eff[s1];
5268 eff[s1] *= 0.5;
5269 /*
5270 rootnode[s1] *= 0.5;
5271 */
5272
5273 }
5274 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
5275 {
5276 rootnode[s2] += (double)len[i][1] * eff[s2];
5277 eff[s2] *= 0.5;
5278 /*
5279 rootnode[s2] *= 0.5;
5280 */
5281
5282 }
5283 }
5284 for( i=0; i<nseq; i++ )
5285 {
5286 #if 1 /* 97.9.29 */
5287 rootnode[i] += GETA3;
5288 #endif
5289 #if 0
5290 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
5291 #endif
5292 }
5293 #if 1
5294 total = 0.0;
5295 for( i=0; i<nseq; i++ )
5296 {
5297 total += rootnode[i];
5298 }
5299 #else
5300 total = 1.0;
5301 #endif
5302
5303 for( i=0; i<nseq; i++ )
5304 {
5305 node[i] = rootnode[i] / total;
5306 }
5307
5308 #if 0
5309 reporterr( "weight array in counteff_simple\n" );
5310 for( i=0; i<nseq; i++ )
5311 reporterr( "%f\n", node[i] );
5312 printf( "\n" );
5313 exit( 1 );
5314 #endif
5315 free( rootnode );
5316 free( eff );
5317 }
5318
counteff_simple(int nseq,int *** topol,double ** len,double * node)5319 void counteff_simple( int nseq, int ***topol, double **len, double *node )
5320 {
5321 int i, j, s1, s2;
5322 double total;
5323 #if 0
5324 static double rootnode[M];
5325 static double eff[M];
5326 #else
5327 double *rootnode;
5328 double *eff;
5329 rootnode = AllocateDoubleVec( nseq );
5330 eff = AllocateDoubleVec( nseq );
5331 #endif
5332
5333 #if DEBUG
5334 for( i=0; i<nseq; i++ ){
5335 reporterr( "len0 = %f\n", len[i][0] );
5336 reporterr( "len1 = %f\n", len[i][1] );
5337 }
5338 #endif
5339 for( i=0; i<nseq; i++ )
5340 {
5341 rootnode[i] = 0.0;
5342 eff[i] = 1.0;
5343 /*
5344 rootnode[i] = 1.0;
5345 */
5346 }
5347 for( i=0; i<nseq-1; i++ )
5348 {
5349 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
5350 {
5351 rootnode[s1] += len[i][0] * eff[s1];
5352 eff[s1] *= 0.5;
5353 /*
5354 rootnode[s1] *= 0.5;
5355 */
5356
5357 }
5358 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
5359 {
5360 rootnode[s2] += len[i][1] * eff[s2];
5361 eff[s2] *= 0.5;
5362 /*
5363 rootnode[s2] *= 0.5;
5364 */
5365
5366 }
5367 }
5368 for( i=0; i<nseq; i++ )
5369 {
5370 #if 1 /* 97.9.29 */
5371 rootnode[i] += GETA3;
5372 #endif
5373 #if 0
5374 reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
5375 #endif
5376 }
5377 #if 1
5378 total = 0.0;
5379 for( i=0; i<nseq; i++ )
5380 {
5381 total += rootnode[i];
5382 }
5383 #else
5384 total = 1.0;
5385 #endif
5386
5387 for( i=0; i<nseq; i++ )
5388 {
5389 node[i] = rootnode[i] / total;
5390 }
5391
5392 #if 0
5393 reporterr( "weight array in counteff_simple\n" );
5394 for( i=0; i<nseq; i++ )
5395 reporterr( "%f\n", node[i] );
5396 printf( "\n" );
5397 exit( 1 );
5398 #endif
5399 #if 1
5400 free( rootnode );
5401 free( eff );
5402 #endif
5403 }
5404
5405
counteff(int nseq,int *** topol,double ** len,double ** node)5406 void counteff( int nseq, int ***topol, double **len, double **node )
5407 {
5408 int i, j, k, s1, s2;
5409 double rootnode[M];
5410 double eff[M];
5411
5412 if( mix )
5413 {
5414 switch( weight )
5415 {
5416 case( 2 ):
5417 weight = 3;
5418 break;
5419 case( 3 ):
5420 weight = 2;
5421 break;
5422 default:
5423 ErrorExit( "mix error" );
5424 break;
5425 }
5426 }
5427
5428 if( weight == 2 )
5429 {
5430 for( i=0; i<nseq; i++ ) rootnode[i] = 0;
5431 for( i=0; i<nseq-2; i++ )
5432 {
5433 for( j=0; topol[i][0][j]>-1; j++ )
5434 rootnode[topol[i][0][j]]++;
5435 for( j=0; topol[i][1][j]>-1; j++ )
5436 rootnode[topol[i][1][j]]++;
5437 for( j=0; topol[i][0][j]>-1; j++ )
5438 {
5439 s1 = topol[i][0][j];
5440 for( k=0; topol[i][1][k]>-1; k++ )
5441 {
5442 s2 = topol[i][1][k];
5443 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
5444 }
5445 }
5446 }
5447 for( j=0; topol[nseq-2][0][j]>-1; j++ )
5448 {
5449 s1 = topol[nseq-2][0][j];
5450 for( k=0; topol[nseq-2][1][k]>-1; k++ )
5451 {
5452 s2 = topol[nseq-2][1][k];
5453 node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
5454 }
5455 }
5456 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5457 node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
5458 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5459 node[j][i] = node[i][j];
5460 }
5461
5462 if( weight == 3 )
5463 {
5464 #if DEBUG
5465 for( i=0; i<nseq; i++ ){
5466 reporterr( "len0 = %f\n", len[i][0] );
5467 reporterr( "len1 = %f\n", len[i][1] );
5468 }
5469 #endif
5470 for( i=0; i<nseq; i++ )
5471 {
5472 rootnode[i] = 0.0;
5473 eff[i] = 1.0;
5474 /*
5475 rootnode[i] = 1.0;
5476 */
5477 }
5478 for( i=0; i<nseq-1; i++ )
5479 {
5480 for( j=0; (s1=topol[i][0][j]) > -1; j++ )
5481 {
5482 rootnode[s1] += len[i][0] * eff[s1];
5483 eff[s1] *= 0.5;
5484 /*
5485 rootnode[s1] *= 0.5;
5486 */
5487
5488 }
5489 for( j=0; (s2=topol[i][1][j]) > -1; j++ )
5490 {
5491 rootnode[s2] += len[i][1] * eff[s2];
5492 eff[s2] *= 0.5;
5493 /*
5494 rootnode[s2] *= 0.5;
5495 */
5496
5497 }
5498 }
5499 for( i=0; i<nseq; i++ )
5500 {
5501 #if 1 /* 97.9.29 */
5502 rootnode[i] += GETA3;
5503 #endif
5504 #if DEBUG
5505 reporterr( "rootnode for %d = %f\n", i, rootnode[i] );
5506 #endif
5507 }
5508 for( i=0; i<nseq; i++ )
5509 {
5510 for( j=0; j<nseq; j++ )
5511 if( j != i )
5512 node[i][j] = (double)rootnode[i] * rootnode[j];
5513 else node[i][i] = rootnode[i];
5514 }
5515 }
5516
5517 #if 0
5518 printf( "weight matrix in counteff\n" );
5519 for( i=0; i<nseq; i++ )
5520 {
5521 for( j=0; j<nseq; j++ )
5522 {
5523 printf( "%f ", node[i][j] );
5524 }
5525 printf( "\n" );
5526 }
5527 #endif
5528 }
5529
score_calcp(char * seq1,char * seq2,int len)5530 double score_calcp( char *seq1, char *seq2, int len )
5531 {
5532 int k;
5533 int ms1, ms2;
5534 double tmpscore;
5535 int len2 = len - 2;
5536
5537 tmpscore = 0.0;
5538 for( k=0; k<len; k++ )
5539 {
5540 ms1 = (int)seq1[k];
5541 ms2 = (int)seq2[k];
5542 if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
5543 tmpscore += (double)amino_dis[ms1][ms2];
5544
5545 if( ms1 == (int)'-' )
5546 {
5547 tmpscore += (double)penalty;
5548 tmpscore += (double)amino_dis[ms1][ms2];
5549 while( (ms1=(int)seq1[++k]) == (int)'-' )
5550 tmpscore += (double)amino_dis[ms1][ms2];
5551 k--;
5552 if( k >len2 ) break;
5553 continue;
5554 }
5555 if( ms2 == (int)'-' )
5556 {
5557 tmpscore += (double)penalty;
5558 tmpscore += (double)amino_dis[ms1][ms2];
5559 while( (ms2=(int)seq2[++k]) == (int)'-' )
5560 tmpscore += (double)amino_dis[ms1][ms2];
5561 k--;
5562 if( k > len2 ) break;
5563 continue;
5564 }
5565 }
5566 return( tmpscore );
5567 }
5568
score_calc1(char * seq1,char * seq2)5569 double score_calc1( char *seq1, char *seq2 ) /* method 1 */
5570 {
5571 int k;
5572 double score = 0.0;
5573 int count = 0;
5574 int len = strlen( seq1 );
5575
5576 for( k=0; k<len; k++ )
5577 {
5578 if( seq1[k] != '-' && seq2[k] != '-' )
5579 {
5580 score += (double)amino_dis[(int)seq1[k]][(int)seq2[k]];
5581 count++;
5582 }
5583 }
5584 if( count ) score /= (double)count;
5585 else score = 1.0;
5586 return( score );
5587 }
5588
substitution_nid(char * seq1,char * seq2)5589 double substitution_nid( char *seq1, char *seq2 )
5590 {
5591 int k;
5592 double s12;
5593 int len = strlen( seq1 );
5594
5595 s12 = 0.0;
5596 for( k=0; k<len; k++ )
5597 if( seq1[k] != '-' && seq2[k] != '-' )
5598 s12 += ( seq1[k] == seq2[k] );
5599
5600 // fprintf( stdout, "s12 = %f\n", s12 );
5601 return( s12 );
5602 }
5603
substitution_score(char * seq1,char * seq2)5604 double substitution_score( char *seq1, char *seq2 )
5605 {
5606 int k;
5607 double s12;
5608 int len = strlen( seq1 );
5609
5610 s12 = 0.0;
5611 for( k=0; k<len; k++ )
5612 if( seq1[k] != '-' && seq2[k] != '-' )
5613 s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
5614
5615 // fprintf( stdout, "s12 = %f\n", s12 );
5616 return( s12 );
5617 }
5618
substitution_hosei(char * seq1,char * seq2)5619 double substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
5620 #if 0
5621 {
5622 int k;
5623 double score = 0.0;
5624 int count = 0;
5625 int len = strlen( seq1 );
5626
5627 for( k=0; k<len; k++ )
5628 {
5629 if( seq1[k] != '-' && seq2[k] != '-' )
5630 {
5631 score += (double)( seq1[k] != seq2[k] );
5632 count++;
5633 }
5634 }
5635 if( count ) score /= (double)count;
5636 else score = 1.0;
5637 if( score < 0.95 ) score = - log( 1.0 - score );
5638 else score = 3.0;
5639 return( score );
5640 }
5641 #else
5642 {
5643 int count = 0;
5644 double score;
5645 int iscore = 0;
5646 char s1, s2;
5647
5648 while( (s1=*seq1++) )
5649 {
5650 s2 = *seq2++;
5651 if( s1 == '-' ) continue;
5652 if( s2 == '-' ) continue;
5653 iscore += ( s1 != s2 );
5654 count++;
5655 }
5656 if( count ) score = (double)iscore / count;
5657 else score = 1.0;
5658 if( score < 0.95 ) score = - log( 1.0 - score );
5659 else score = 3.0;
5660 return( score );
5661 }
5662 #endif
5663
substitution(char * seq1,char * seq2)5664 double substitution( char *seq1, char *seq2 ) /* method 1 */
5665 {
5666 int k;
5667 double score = 0.0;
5668 int count = 0;
5669 int len = strlen( seq1 );
5670
5671 for( k=0; k<len; k++ )
5672 {
5673 if( seq1[k] != '-' && seq2[k] != '-' )
5674 {
5675 score += (double)( seq1[k] != seq2[k] );
5676 count++;
5677 }
5678 }
5679 if( count ) score /= (double)count;
5680 else score = 1.0;
5681 return( score );
5682 }
5683
5684
treeconstruction(char ** seq,int nseq,int *** topol,double ** len,double ** eff)5685 void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
5686 {
5687 int i, j;
5688
5689 if( weight > 1 )
5690 {
5691 if( utree == 0 )
5692 {
5693 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
5694 {
5695 /*
5696 eff[i][j] = (double)score_calc1( seq[i], seq[j] );
5697 */
5698 eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
5699 /*
5700 reporterr( "%f\n", eff[i][j] );
5701 */
5702 }
5703 /*
5704 reporterr( "distance matrix\n" );
5705 for( i=0; i<nseq; i++ )
5706 {
5707 for( j=0; j<nseq; j++ )
5708 {
5709 reporterr( "%f ", eff[i][j] );
5710 }
5711 reporterr( "\n" );
5712 }
5713 */
5714 /*
5715 upg( nseq, eff, topol, len );
5716 upg2( nseq, eff, topol, len );
5717 */
5718 spg( nseq, eff, topol, len );
5719 counteff( nseq, topol, len, eff );
5720 }
5721 }
5722 else
5723 {
5724 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
5725 eff[i][j] = 1.0;
5726 }
5727 /*
5728 reporterr( "weight matrix\n" );
5729 for( i=0; i<nseq; i++ )
5730 {
5731 for( j=0; j<nseq; j++ )
5732 {
5733 reporterr( "%f ", eff[i][j] );
5734 }
5735 reporterr( "\n" );
5736 }
5737 */
5738 }
5739
bscore_calc(char ** seq,int s,double ** eff)5740 double bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
5741 {
5742 int i, j, k;
5743 int gb1, gb2, gc1, gc2;
5744 int cob;
5745 int nglen;
5746 int len = strlen( seq[0] );
5747 long score;
5748
5749 score = 0;
5750 nglen = 0;
5751 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
5752 {
5753 double efficient = eff[i][j];
5754
5755 gc1 = 0;
5756 gc2 = 0;
5757 for( k=0; k<len; k++ )
5758 {
5759 gb1 = gc1;
5760 gb2 = gc2;
5761
5762 gc1 = ( seq[i][k] == '-' );
5763 gc2 = ( seq[j][k] == '-' );
5764
5765 cob =
5766 !gb1 * gc1
5767 * !gb2 * !gc2
5768
5769 + !gb1 * !gc1
5770 * !gb2 * gc2
5771
5772 + !gb1 * gc1
5773 * gb2 * !gc2
5774
5775 + gb1 * !gc1
5776 * !gb2 * gc2
5777
5778 + gb1 * !gc1
5779 * gb2 * gc2 *BEFF
5780
5781 + gb1 * gc1
5782 * gb2 * !gc2 *BEFF
5783 ;
5784 score += (long)cob * penalty * efficient;
5785 score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
5786 nglen += ( !gc1 * !gc2 );
5787 }
5788 }
5789 return( (double)score / nglen + 400.0 * !scoremtx );
5790 }
5791
AllocateTmpSeqs(char *** mseq2pt,char ** mseq1pt,int locnlenmax)5792 void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
5793 {
5794 *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
5795 *mseq1pt = AllocateCharVec( locnlenmax+1 );
5796 }
5797
FreeTmpSeqs(char ** mseq2,char * mseq1)5798 void FreeTmpSeqs( char **mseq2, char *mseq1 )
5799 {
5800 FreeCharMtx( mseq2 );
5801 free( (char *)mseq1 );
5802 }
5803
5804
gappick0(char * aseq,char * seq)5805 void gappick0( char *aseq, char *seq )
5806 {
5807 for( ; *seq != 0; seq++ )
5808 {
5809 if( *seq != '-' )
5810 *aseq++ = *seq;
5811 }
5812 *aseq = 0;
5813
5814 }
5815
isallgap(char * seq)5816 int isallgap( char *seq )
5817 {
5818 for( ; *seq != 0; seq++ )
5819 {
5820 if( *seq != '-' )
5821 return( 0 );
5822 }
5823 return( 1 );
5824 }
5825
gappick(int nseq,int s,char ** aseq,char ** mseq2,double ** eff,double * effarr)5826 void gappick( int nseq, int s, char **aseq, char **mseq2,
5827 double **eff, double *effarr )
5828 {
5829 int i, j, count, countjob, len, allgap;
5830 len = strlen( aseq[0] );
5831 for( i=0, count=0; i<len; i++ )
5832 {
5833 allgap = 1;
5834 for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
5835 if( allgap == 0 )
5836 {
5837 for( j=0, countjob=0; j<nseq; j++ )
5838 {
5839 if( j != s )
5840 {
5841 mseq2[countjob][count] = aseq[j][i];
5842 countjob++;
5843 }
5844 }
5845 count++;
5846 }
5847 }
5848 for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
5849
5850 for( i=0, countjob=0; i<nseq; i++ )
5851 {
5852 if( i != s )
5853 {
5854 effarr[countjob] = eff[s][i];
5855 countjob++;
5856 }
5857 }
5858 /*
5859 fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
5860 for( i=0; i<countjob; i++ )
5861 fprintf( stdout, " %f", effarr[i] );
5862 printf( "\n" );
5863 */
5864 }
5865
commongappick_record(int nseq,char ** seq,int * map)5866 void commongappick_record( int nseq, char **seq, int *map )
5867 {
5868 int i, j, count;
5869 int len = strlen( seq[0] );
5870
5871
5872 for( i=0, count=0; i<=len; i++ )
5873 {
5874 /*
5875 allgap = 1;
5876 for( j=0; j<nseq; j++ )
5877 allgap *= ( seq[j][i] == '-' );
5878 if( !allgap )
5879 */
5880 for( j=0; j<nseq; j++ )
5881 if( seq[j][i] != '-' ) break;
5882 if( j != nseq )
5883 {
5884 for( j=0; j<nseq; j++ )
5885 {
5886 seq[j][count] = seq[j][i];
5887 }
5888 map[count] = i;
5889 count++;
5890 }
5891 }
5892 }
5893
5894
commongappick(int nseq,char ** seq)5895 void commongappick( int nseq, char **seq )
5896 {
5897 int i, j, count;
5898 int len = strlen( seq[0] );
5899 #if 1
5900
5901 int *mapfromnewtoold;
5902
5903 mapfromnewtoold = calloc( len+1, sizeof( int ) );
5904
5905 for( i=0, count=0; i<=len; i++ )
5906 {
5907 for( j=0; j<nseq; j++ )
5908 if( seq[j][i] != '-' ) break;
5909 if( j != nseq )
5910 {
5911 mapfromnewtoold[count++] = i;
5912 }
5913 }
5914 // mapfromnewtoold[count] = -1; // iranai
5915 for( j=0; j<nseq; j++ )
5916 {
5917 for( i=0; i<count; i++ )
5918 {
5919 seq[j][i] = seq[j][mapfromnewtoold[i]];
5920 }
5921 }
5922 free( mapfromnewtoold );
5923 #else
5924 --------------------------
5925
5926 int *mapfromoldtonew;
5927 int pos;
5928
5929 mapfromoldtonew = calloc( len+1, sizeof( int ) );
5930 for( i=0; i<=len; i++ ) mapfromoldtonew[i] = -1;
5931
5932 for( i=0, count=0; i<=len; i++ )
5933 {
5934 for( j=0; j<nseq; j++ )
5935 if( seq[j][i] != '-' ) break;
5936 if( j != nseq )
5937 {
5938 mapfromoldtonew[i] = count;
5939 count++;
5940 }
5941 }
5942 for( j=0; j<nseq; j++ )
5943 {
5944 for( i=0; i<=len; i++ )
5945 {
5946 if( (pos=mapfromoldtonew[i]) != -1 )
5947 seq[j][pos] = seq[j][i];
5948 }
5949 }
5950 free( mapfromoldtonew );
5951 --------------------------
5952
5953 for( i=0, count=0; i<=len; i++ )
5954 {
5955 /*
5956 allgap = 1;
5957 for( j=0; j<nseq; j++ )
5958 allgap *= ( seq[j][i] == '-' );
5959 if( !allgap )
5960 */
5961 for( j=0; j<nseq; j++ )
5962 if( seq[j][i] != '-' ) break;
5963 if( j != nseq )
5964 {
5965 for( j=0; j<nseq; j++ )
5966 {
5967 seq[j][count] = seq[j][i];
5968 }
5969 count++;
5970 }
5971 }
5972
5973 #endif
5974 }
5975
5976 #if 0
5977 void commongaprecord( int nseq, char **seq, char *originallygapped )
5978 {
5979 int i, j;
5980 int len = strlen( seq[0] );
5981
5982 for( i=0; i<len; i++ )
5983 {
5984 for( j=0; j<nseq; j++ )
5985 if( seq[j][i] != '-' ) break;
5986 if( j == nseq )
5987 originallygapped[i] = '-';
5988 else
5989 originallygapped[i] = 'o';
5990 }
5991 originallygapped[len] = 0;
5992 }
5993 #endif
5994
score_calc0(char ** seq,int s,double ** eff,int ex)5995 double score_calc0( char **seq, int s, double **eff, int ex )
5996 {
5997 double tmp;
5998
5999 if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
6000 if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
6001 else tmp = score_calc5( seq, s, eff, ex );
6002
6003 return( tmp );
6004
6005 }
6006
6007 /*
6008 double score_m_1( char **seq, int ex, double **eff )
6009 {
6010 int i, j, k;
6011 int len = strlen( seq[0] );
6012 int gb1, gb2, gc1, gc2;
6013 int cob;
6014 int nglen;
6015 double score;
6016
6017 score = 0.0;
6018 nglen = 0;
6019 for( i=0; i<njob; i++ )
6020 {
6021 double efficient = eff[MIN(i,ex)][MAX(i,ex)];
6022 if( i == ex ) continue;
6023
6024 gc1 = 0;
6025 gc2 = 0;
6026 for( k=0; k<len; k++ )
6027 {
6028 gb1 = gc1;
6029 gb2 = gc2;
6030
6031 gc1 = ( seq[i][k] == '-' );
6032 gc2 = ( seq[ex][k] == '-' );
6033
6034 cob =
6035 !gb1 * gc1
6036 * !gb2 * !gc2
6037
6038 + !gb1 * !gc1
6039 * !gb2 * gc2
6040
6041 + !gb1 * gc1
6042 * gb2 * !gc2
6043
6044 + gb1 * !gc1
6045 * !gb2 * gc2
6046
6047 + gb1 * !gc1
6048 * gb2 * gc2 *BEFF
6049
6050 + gb1 * gc1
6051 * gb2 * !gc2 *BEFF
6052 ;
6053 score += (double)cob * penalty * efficient;
6054 score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
6055 *
6056 nglen += ( !gc1 * !gc2 );
6057 *
6058 if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
6059 }
6060 }
6061 return( (double)score / nglen + 400.0 * !scoremtx );
6062 }
6063 */
6064
6065 #if 0
6066 void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
6067 {
6068 int i, j, k;
6069 int len = strlen( seq[0] );
6070 double tmp;
6071 double count;
6072 int ch;
6073 double sco[N];
6074
6075 for( i=0; i<len; i++ )
6076 {
6077 tmp = 0.0; count = 0;
6078 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
6079 {
6080 /*
6081 if( seq[j][i] != '-' && seq[k][i] != '-' )
6082 */
6083 {
6084 tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
6085 count++;
6086 }
6087 }
6088 if( count > 0.0 ) tmp /= count;
6089 else( tmp = 0.0 );
6090 ch = (int)( tmp/100.0 - 0.000001 );
6091 sprintf( sco1+i, "%c", ch+0x61 );
6092 }
6093 sco1[len] = 0;
6094
6095 for( i=0; i<len; i++ )
6096 {
6097 tmp = 0.0; count = 0;
6098 for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
6099 {
6100 /*
6101 if( seq[j][i] != '-' && seq[k][i] != '-' )
6102 */
6103 {
6104 tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
6105 count += eff[j][k];
6106 }
6107 }
6108 if( count > 0.0 ) tmp /= count;
6109 else( tmp = 0.0 );
6110 tmp = ( tmp - 400 * !scoremtx ) * 2;
6111 if( tmp < 0 ) tmp = 0;
6112 ch = (int)( tmp/100.0 - 0.000001 );
6113 sprintf( sco2+i, "%c", ch+0x61 );
6114 sco[i] = tmp;
6115 }
6116 sco2[len] = 0;
6117
6118 for( i=WIN; i<len-WIN; i++ )
6119 {
6120 tmp = 0.0;
6121 for( j=i-WIN; j<=i+WIN; j++ )
6122 {
6123 tmp += sco[j];
6124 }
6125 for( j=0; j<njob; j++ )
6126 {
6127 if( seq[j][i] == '-' )
6128 {
6129 tmp = 0.0;
6130 break;
6131 }
6132 }
6133 tmp /= WIN * 2 + 1;
6134 ch = (int)( tmp/100.0 - 0.0000001 );
6135 sprintf( sco3+i, "%c", ch+0x61 );
6136 }
6137 for( i=0; i<WIN; i++ ) sco3[i] = '-';
6138 for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
6139 sco3[len] = 0;
6140 }
6141 #endif
6142
strins(char * str1,char * str2)6143 void strins( char *str1, char *str2 )
6144 {
6145 char *bk;
6146 int len1 = strlen( str1 );
6147 int len2 = strlen( str2 );
6148
6149 bk = str2;
6150 str2 += len1+len2;
6151 str1 += len1-1;
6152
6153 while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
6154 while( str2 >= bk ) { *str2-- = *str1--; }
6155 }
6156
isaligned(int nseq,char ** seq)6157 int isaligned( int nseq, char **seq )
6158 {
6159 int i;
6160 int len = strlen( seq[0] );
6161 for( i=1; i<nseq; i++ )
6162 {
6163 if( strlen( seq[i] ) != len ) return( 0 );
6164 }
6165 return( 1 );
6166 }
6167
score_calc_for_score(int nseq,char ** seq)6168 double score_calc_for_score( int nseq, char **seq )
6169 {
6170 int i, j, k, c;
6171 int len = strlen( seq[0] );
6172 double score;
6173 double tmpscore;
6174 char *mseq1, *mseq2;
6175
6176 score = 0.0;
6177 for( i=0; i<nseq-1; i++ )
6178 {
6179 for( j=i+1; j<nseq; j++ )
6180 {
6181 mseq1 = seq[i];
6182 mseq2 = seq[j];
6183 tmpscore = 0.0;
6184 c = 0;
6185 for( k=0; k<len; k++ )
6186 {
6187 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
6188 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
6189 c++;
6190 if( mseq1[k] == '-' )
6191 {
6192 tmpscore += penalty - n_dis[0][24];
6193 while( mseq1[++k] == '-' )
6194 ;
6195 k--;
6196 if( k > len-2 ) break;
6197 continue;
6198 }
6199 if( mseq2[k] == '-' )
6200 {
6201 tmpscore += penalty - n_dis[0][24];
6202 while( mseq2[++k] == '-' )
6203 ;
6204 k--;
6205 if( k > len-2 ) break;
6206 continue;
6207 }
6208 }
6209 score += (double)tmpscore / (double)c;
6210 #if DEBUG
6211 printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
6212 printf( "tmpscore / c = %f\n", tmpscore/(double)c );
6213 #endif
6214 }
6215 }
6216 reporterr( "raw score = %f\n", score );
6217 score /= (double)nseq * ( nseq-1.0 ) / 2.0;
6218 score += 400.0;
6219 #if DEBUG
6220 printf( "score in mltaln9.c = %f\n", score );
6221 #endif
6222 return( (double)score );
6223 }
6224
doublencpy(double * vec1,double * vec2,int len)6225 void doublencpy( double *vec1, double *vec2, int len )
6226 {
6227 while( len-- )
6228 *vec1++ = *vec2++;
6229 }
6230
score_calc_a(char ** seq,int s,double ** eff)6231 double score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
6232 {
6233 int i, j, k;
6234 int gb1, gb2, gc1, gc2;
6235 int cob;
6236 int nglen;
6237 int len = strlen( seq[0] );
6238 double score;
6239
6240 score = 0;
6241 nglen = 0;
6242 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
6243 {
6244 double efficient = eff[i][j];
6245
6246 gc1 = 0;
6247 gc2 = 0;
6248 for( k=0; k<len; k++ )
6249 {
6250 gb1 = gc1;
6251 gb2 = gc2;
6252
6253 gc1 = ( seq[i][k] == '-' );
6254 gc2 = ( seq[j][k] == '-' );
6255
6256 cob =
6257 !gb1 * gc1
6258 * !gb2 * !gc2
6259
6260 + gb1 * !gc1
6261 * !gb2 * !gc2
6262
6263 + !gb1 * !gc1
6264 * !gb2 * gc2
6265
6266 + !gb1 * !gc1
6267 * gb2 * !gc2
6268
6269 + !gb1 * gc1
6270 * gb2 * !gc2
6271
6272 + gb1 * !gc1
6273 * !gb2 * gc2
6274
6275 + gb1 * !gc1
6276 * gb2 * gc2
6277
6278 + gb1 * gc1
6279 * gb2 * !gc2
6280
6281 + !gb1 * gc1
6282 * gb2 * gc2
6283
6284 + gb1 * gc1
6285 * !gb2 * gc2
6286 ;
6287 score += 0.5 * (double)cob * penalty * efficient;
6288 score += (double)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (double)efficient;
6289 nglen += ( !gc1 * !gc2 );
6290 }
6291 }
6292 return( (double)score / nglen + 400.0 * !scoremtx );
6293 }
6294
6295
score_calc_s(char ** seq,int s,double ** eff)6296 double score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
6297 {
6298 int i, j, k;
6299 int gb1, gb2, gc1, gc2;
6300 int cob;
6301 int nglen;
6302 int len = strlen( seq[0] );
6303 double score;
6304
6305 score = 0;
6306 nglen = 0;
6307 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
6308 {
6309 double efficient = eff[i][j];
6310
6311 gc1 = 0;
6312 gc2 = 0;
6313 for( k=0; k<len; k++ )
6314 {
6315 gb1 = gc1;
6316 gb2 = gc2;
6317
6318 gc1 = ( seq[i][k] == '-' );
6319 gc2 = ( seq[j][k] == '-' );
6320
6321 cob =
6322 !gb1 * gc1
6323 * !gb2 * !gc2
6324
6325 + gb1 * !gc1
6326 * !gb2 * !gc2
6327
6328 + !gb1 * !gc1
6329 * !gb2 * gc2
6330
6331 + !gb1 * !gc1
6332 * gb2 * !gc2
6333
6334 + !gb1 * gc1
6335 * gb2 * !gc2
6336
6337 + gb1 * !gc1
6338 * !gb2 * gc2
6339
6340 #if 0
6341 + gb1 * !gc1
6342 * gb2 * gc2
6343
6344 + gb1 * gc1
6345 * gb2 * !gc2
6346
6347 + !gb1 * gc1
6348 * gb2 * gc2
6349
6350 + gb1 * gc1
6351 * !gb2 * gc2
6352 #endif
6353 ;
6354 score += 0.5 * (double)cob * penalty * efficient;
6355 score += (double)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (double)efficient;
6356 nglen += ( !gc1 * !gc2 );
6357 }
6358 }
6359 return( (double)score / nglen + 400.0 );
6360 }
6361
score_calc_for_score_s(int s,char ** seq)6362 double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
6363 {
6364 int i, j, k;
6365 int gb1, gb2, gc1, gc2;
6366 int cob;
6367 int nglen;
6368 int len = strlen( seq[0] );
6369 double score;
6370
6371 score = 0;
6372 nglen = 0;
6373 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
6374 {
6375
6376 gc1 = 0;
6377 gc2 = 0;
6378 for( k=0; k<len; k++ )
6379 {
6380 gb1 = gc1;
6381 gb2 = gc2;
6382
6383 gc1 = ( seq[i][k] == '-' );
6384 gc2 = ( seq[j][k] == '-' );
6385
6386 cob =
6387 !gb1 * gc1
6388 * !gb2 * !gc2
6389
6390 + gb1 * !gc1
6391 * !gb2 * !gc2
6392
6393 + !gb1 * !gc1
6394 * !gb2 * gc2
6395
6396 + !gb1 * !gc1
6397 * gb2 * !gc2
6398
6399 + !gb1 * gc1
6400 * gb2 * !gc2
6401
6402 + gb1 * !gc1
6403 * !gb2 * gc2
6404
6405 #if 0
6406 + gb1 * !gc1
6407 * gb2 * gc2
6408
6409 + gb1 * gc1
6410 * gb2 * !gc2
6411
6412 + !gb1 * gc1
6413 * gb2 * gc2
6414
6415 + gb1 * gc1
6416 * !gb2 * gc2
6417 #endif
6418 ;
6419 score += 0.5 * (double)cob * penalty;
6420 score += (double)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
6421 nglen += ( !gc1 * !gc2 );
6422 }
6423 #if 0
6424 reporterr( "i = %d, j=%d\n", i+1, j+1 );
6425 reporterr( "score = %f\n", score );
6426 #endif
6427 }
6428 return( (double)score / nglen + 400.0 );
6429 }
6430
SSPscore___(int s,char ** seq,int ex)6431 double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
6432 {
6433 int i, j, k;
6434 int gb1, gb2, gc1, gc2;
6435 int cob;
6436 int nglen;
6437 int len = strlen( seq[0] );
6438 double score;
6439
6440 score = 0;
6441 nglen = 0;
6442 i=ex; for( j=0; j<s; j++ )
6443 {
6444
6445 if( j == ex ) continue;
6446
6447 gc1 = 0;
6448 gc2 = 0;
6449 for( k=0; k<len; k++ )
6450 {
6451 gb1 = gc1;
6452 gb2 = gc2;
6453
6454 gc1 = ( seq[i][k] == '-' );
6455 gc2 = ( seq[j][k] == '-' );
6456
6457 cob =
6458 !gb1 * gc1
6459 * !gb2 * !gc2
6460
6461 + gb1 * !gc1
6462 * !gb2 * !gc2
6463
6464 + !gb1 * !gc1
6465 * !gb2 * gc2
6466
6467 + !gb1 * !gc1
6468 * gb2 * !gc2
6469
6470 + !gb1 * gc1
6471 * gb2 * !gc2 * 2.0
6472
6473 + gb1 * !gc1
6474 * !gb2 * gc2 * 2.0
6475
6476 #if 0
6477 + gb1 * !gc1
6478 * gb2 * gc2
6479
6480 + gb1 * gc1
6481 * gb2 * !gc2
6482
6483 + !gb1 * gc1
6484 * gb2 * gc2
6485
6486 + gb1 * gc1
6487 * !gb2 * gc2
6488 #endif
6489 ;
6490 score += 0.5 * (double)cob * penalty;
6491 score += (double)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
6492 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
6493 }
6494 #if 0
6495 reporterr( "i = %d, j=%d\n", i+1, j+1 );
6496 reporterr( "score = %f\n", score );
6497 #endif
6498 }
6499 return( (double)score );
6500 }
6501
SSPscore(int s,char ** seq)6502 double SSPscore( int s, char **seq ) /* algorithm S */
6503 {
6504 int i, j, k;
6505 int gb1, gb2, gc1, gc2;
6506 int cob;
6507 int nglen;
6508 int len = strlen( seq[0] );
6509 double score;
6510
6511 score = 0;
6512 nglen = 0;
6513 for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
6514 {
6515
6516 gc1 = 0;
6517 gc2 = 0;
6518 for( k=0; k<len; k++ )
6519 {
6520 gb1 = gc1;
6521 gb2 = gc2;
6522
6523 gc1 = ( seq[i][k] == '-' );
6524 gc2 = ( seq[j][k] == '-' );
6525
6526 cob =
6527 !gb1 * gc1
6528 * !gb2 * !gc2
6529
6530 + gb1 * !gc1
6531 * !gb2 * !gc2
6532
6533 + !gb1 * !gc1
6534 * !gb2 * gc2
6535
6536 + !gb1 * !gc1
6537 * gb2 * !gc2
6538
6539 + !gb1 * gc1
6540 * gb2 * !gc2
6541
6542 + gb1 * !gc1
6543 * !gb2 * gc2
6544
6545 #if 0
6546 + gb1 * !gc1
6547 * gb2 * gc2
6548
6549 + gb1 * gc1
6550 * gb2 * !gc2
6551
6552 + !gb1 * gc1
6553 * gb2 * gc2
6554
6555 + gb1 * gc1
6556 * !gb2 * gc2
6557 #endif
6558 ;
6559 score += 0.5 * (double)cob * penalty;
6560 score += (double)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
6561 nglen += ( !gc1 * !gc2 ); /* tsukawanai */
6562 }
6563 #if 0
6564 reporterr( "i = %d, j=%d\n", i+1, j+1 );
6565 reporterr( "score = %f\n", score );
6566 #endif
6567 }
6568 return( (double)score );
6569 }
6570
6571
6572
DSPscore(int s,char ** seq)6573 double DSPscore( int s, char **seq ) /* method 3 deha nai */
6574 {
6575 int i, j, k;
6576 double c;
6577 int len = strlen( seq[0] );
6578 double score;
6579 double tmpscore;
6580 char *mseq1, *mseq2;
6581 #if DEBUG
6582 FILE *fp;
6583 #endif
6584
6585 score = 0.0;
6586 c = 0.0;
6587
6588 for( i=0; i<s-1; i++ )
6589 {
6590 for( j=i+1; j<s; j++ )
6591 {
6592 mseq1 = seq[i];
6593 mseq2 = seq[j];
6594 tmpscore = 0.0;
6595 for( k=0; k<len; k++ )
6596 {
6597 if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
6598 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
6599
6600 if( mseq1[k] == '-' )
6601 {
6602 tmpscore += penalty;
6603 while( mseq1[++k] == '-' )
6604 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
6605 k--;
6606 if( k > len-2 ) break;
6607 continue;
6608 }
6609 if( mseq2[k] == '-' )
6610 {
6611 tmpscore += penalty;
6612 while( mseq2[++k] == '-' )
6613 tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
6614 k--;
6615 if( k > len-2 ) break;
6616 continue;
6617 }
6618 }
6619 score += (double)tmpscore;
6620 }
6621 }
6622
6623 return( score );
6624 }
6625
6626
6627 #define SEGMENTSIZE 150
6628
searchAnchors(int nseq,char ** seq,Segment * seg)6629 int searchAnchors( int nseq, char **seq, Segment *seg )
6630 {
6631 int i, j, k, kcyc;
6632 int status;
6633 double score;
6634 int value = 0;
6635 int len;
6636 int length;
6637 static double *stra = NULL;
6638 static int alloclen = 0;
6639 double cumscore;
6640 static double threshold;
6641
6642 len = strlen( seq[0] );
6643 if( alloclen < len )
6644 {
6645 if( alloclen )
6646 {
6647 FreeDoubleVec( stra );
6648 }
6649 else
6650 {
6651 threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
6652 }
6653 stra = AllocateDoubleVec( len );
6654 alloclen = len;
6655 }
6656
6657 for( i=0; i<len; i++ )
6658 {
6659 stra[i] = 0.0;
6660 kcyc = nseq-1;
6661 for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
6662 stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
6663 stra[i] /= (double)nseq * ( nseq-1 ) / 2;
6664 }
6665
6666 (seg+0)->skipForeward = 0;
6667 (seg+1)->skipBackward = 0;
6668 status = 0;
6669 cumscore = 0.0;
6670 score = 0.0;
6671 length = 0; /* modified at 01/09/11 */
6672 for( j=0; j<divWinSize; j++ ) score += stra[j];
6673 for( i=1; i<len-divWinSize; i++ )
6674 {
6675 score = score - stra[i-1] + stra[i+divWinSize-1];
6676 #if DEBUG
6677 reporterr( "%d %f ? %f", i, score, threshold );
6678 if( score > threshold ) reporterr( "YES\n" );
6679 else reporterr( "NO\n" );
6680 #endif
6681
6682 if( score > threshold )
6683 {
6684 if( !status )
6685 {
6686 status = 1;
6687 seg->start = i;
6688 length = 0;
6689 cumscore = 0.0;
6690 }
6691 length++;
6692 cumscore += score;
6693 }
6694 if( score <= threshold || length > SEGMENTSIZE )
6695 {
6696 if( status )
6697 {
6698 seg->end = i;
6699 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
6700 seg->score = cumscore;
6701 #if DEBUG
6702 reporterr( "%d-%d length = %d\n", seg->start, seg->end, length );
6703 #endif
6704 if( length > SEGMENTSIZE )
6705 {
6706 (seg+0)->skipForeward = 1;
6707 (seg+1)->skipBackward = 1;
6708 }
6709 else
6710 {
6711 (seg+0)->skipForeward = 0;
6712 (seg+1)->skipBackward = 0;
6713 }
6714 length = 0;
6715 cumscore = 0.0;
6716 status = 0;
6717 value++;
6718 seg++;
6719 if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
6720 }
6721 }
6722 }
6723 if( status )
6724 {
6725 seg->end = i;
6726 seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
6727 seg->score = cumscore;
6728 #if DEBUG
6729 reporterr( "%d-%d length = %d\n", seg->start, seg->end, length );
6730 #endif
6731 value++;
6732 }
6733 return( value );
6734 }
6735
dontcalcimportance(int nseq,double * eff,char ** seq,LocalHom ** localhom)6736 void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
6737 {
6738 int i, j;
6739 LocalHom *ptr;
6740 int *nogaplen;
6741
6742 nogaplen = AllocateIntVec( nseq );
6743
6744 for( i=0; i<nseq; i++ )
6745 {
6746 nogaplen[i] = seqlen( seq[i] );
6747 // reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
6748 }
6749
6750 for( i=0; i<nseq; i++ )
6751 {
6752 for( j=0; j<nseq; j++ )
6753 {
6754 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
6755 {
6756 // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
6757 #if 1
6758 ptr->importance = ptr->opt / ptr->overlapaa;
6759 ptr->fimportance = (double)ptr->importance;
6760 #else
6761 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
6762 #endif
6763 }
6764 }
6765 }
6766 free( nogaplen );
6767 }
6768
dontcalcimportance_firstone(int nseq,double * eff,char ** seq,LocalHom ** localhom)6769 void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom )
6770 {
6771 int i, j, nseq1;
6772 LocalHom *ptr;
6773 #if 1
6774 #else
6775 int *nogaplen;
6776 nogaplen = AllocateIntVec( nseq );
6777 for( i=0; i<nseq; i++ )
6778 {
6779 nogaplen[i] = seqlen( seq[i] );
6780 // reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
6781 }
6782 #endif
6783
6784 nseq1 = nseq - 1;
6785 for( i=0; i<nseq1; i++ )
6786 {
6787 j=0;
6788 {
6789 for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
6790 {
6791 // reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
6792 #if 1
6793 // ptr->importance = ptr->opt / ptr->overlapaa;
6794 ptr->importance = ptr->opt * 0.5; // tekitou
6795 ptr->fimportance = (double)ptr->importance;
6796 // reporterr( "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt );
6797 #else
6798 ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
6799 #endif
6800 }
6801 }
6802 }
6803 #if 1
6804 #else
6805 free( nogaplen );
6806 #endif
6807 }
6808
calcimportance(int nseq,double * eff,char ** seq,LocalHom ** localhom)6809 void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
6810 {
6811 int i, j, pos, len;
6812 double *importance; // static -> local, 2012/02/25
6813 double tmpdouble;
6814 double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
6815 int *nogaplen; // static -> local, 2012/02/25
6816 LocalHom *tmpptr;
6817
6818 importance = AllocateDoubleVec( nlenmax );
6819 nogaplen = AllocateIntVec( nseq );
6820 ieff = AllocateDoubleVec( nseq );
6821
6822 totaleff = 0.0;
6823 for( i=0; i<nseq; i++ )
6824 {
6825 nogaplen[i] = seqlen( seq[i] );
6826 // reporterr( "nogaplen[] = %d\n", nogaplen[i] );
6827 if( nogaplen[i] == 0 ) ieff[i] = 0.0;
6828 else ieff[i] = eff[i];
6829 totaleff += ieff[i];
6830 }
6831 for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
6832 // for( i=0; i<nseq; i++ ) reporterr( "eff[%d] = %f\n", i, ieff[i] );
6833
6834 #if 0
6835 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
6836 {
6837 tmpptr = localhom[i]+j;
6838 reporterr( "%d-%d\n", i, j );
6839 do
6840 {
6841 reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
6842 } while( tmpptr=tmpptr->next );
6843 }
6844 #endif
6845
6846
6847 for( i=0; i<nseq; i++ )
6848 {
6849 // reporterr( "i = %d\n", i );
6850 for( pos=0; pos<nlenmax; pos++ )
6851 importance[pos] = 0.0;
6852 for( j=0; j<nseq; j++ )
6853 {
6854 if( i == j ) continue;
6855 tmpptr = localhom[i]+j;
6856 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
6857 {
6858 if( tmpptr->opt == -1 ) continue;
6859 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
6860 {
6861 #if 1
6862 importance[pos] += ieff[j];
6863 #else
6864 importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
6865 importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
6866 #endif
6867 }
6868 }
6869 }
6870 #if 0
6871 reporterr( "position specific importance of seq %d:\n", i );
6872 for( pos=0; pos<nlenmax; pos++ )
6873 reporterr( "%d: %f\n", pos, importance[pos] );
6874 reporterr( "\n" );
6875 #endif
6876 for( j=0; j<nseq; j++ )
6877 {
6878 // reporterr( "i=%d, j=%d\n", i, j );
6879 if( i == j ) continue;
6880 if( localhom[i][j].opt == -1.0 ) continue;
6881 #if 1
6882 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
6883 {
6884 if( tmpptr->opt == -1.0 ) continue;
6885 tmpdouble = 0.0;
6886 len = 0;
6887 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
6888 {
6889 tmpdouble += importance[pos];
6890 len++;
6891 }
6892
6893 tmpdouble /= (double)len;
6894
6895 tmpptr->importance = tmpdouble * tmpptr->opt;
6896 tmpptr->fimportance = (double)tmpptr->importance;
6897 }
6898 #else
6899 tmpdouble = 0.0;
6900 len = 0;
6901 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
6902 {
6903 if( tmpptr->opt == -1.0 ) continue;
6904 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
6905 {
6906 tmpdouble += importance[pos];
6907 len++;
6908 }
6909 }
6910
6911 tmpdouble /= (double)len;
6912
6913 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
6914 {
6915 if( tmpptr->opt == -1.0 ) continue;
6916 tmpptr->importance = tmpdouble * tmpptr->opt;
6917 // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B
6918 }
6919 #endif
6920
6921 // reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
6922 }
6923 }
6924
6925 #if 0
6926 reporterr( "before averaging:\n" );
6927
6928 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
6929 {
6930 reporterr( "%d-%d\n", i, j );
6931 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
6932 {
6933 reporterr( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
6934 }
6935 }
6936 #endif
6937
6938 #if 1
6939 // reporterr( "average?\n" );
6940 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
6941 {
6942 double imp;
6943 LocalHom *tmpptr1, *tmpptr2;
6944
6945 // reporterr( "i=%d, j=%d\n", i, j );
6946
6947 tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
6948 for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
6949 {
6950 if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
6951 {
6952 // reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
6953 continue;
6954 }
6955 // reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
6956 imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
6957 tmpptr1->importance = tmpptr2->importance = imp;
6958 tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
6959
6960 // reporterr( "## importance = %f\n", tmpptr1->importance );
6961
6962 }
6963
6964 #if 0 // commented out, 2012/02/10
6965 if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
6966 {
6967 reporterr( "ERROR: i=%d, j=%d\n", i, j );
6968 exit( 1 );
6969 }
6970 #endif
6971 }
6972 #endif
6973 #if 0
6974 reporterr( "after averaging:\n" );
6975
6976 for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
6977 {
6978 for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
6979 {
6980 if( tmpptr->end1 )
6981 reporterr( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
6982 }
6983 }
6984 #endif
6985 free( importance );
6986 free( nogaplen );
6987 free( ieff );
6988 }
6989
6990
6991
6992 #if 0
6993 void weightimportance( int nseq, double **eff, LocalHom **localhom )
6994 {
6995 int i, j, pos, len;
6996 static double *importance;
6997 double tmpdouble;
6998 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
6999 if( importance == NULL )
7000 importance = AllocateDoubleVec( nlenmax );
7001
7002
7003 reporterr( "effmtx = :\n" );
7004 for( i=0; i<nseq; i++ )
7005 {
7006 for( j=0; j<nseq; j++ )
7007 {
7008 reporterr( "%6.3f ", eff[i][j] );
7009 }
7010 reporterr( "\n" );
7011 }
7012 for( i=0; i<nseq; i++ )
7013 {
7014 for( pos=0; pos<nlenmax; pos++ )
7015 importance[pos] = 0.0;
7016 for( j=0; j<nseq; j++ )
7017 {
7018
7019 if( i == j ) continue;
7020 tmpptr = localhom[i]+j;
7021 while( 1 )
7022 {
7023 reporterr( "i=%d, j=%d\n", i, j );
7024 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
7025 // importance[pos] += eff[i][j] * tmpptr->importance;
7026 importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
7027 reporterr( "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
7028 tmpptr = tmpptr->next;
7029 if( tmpptr == NULL ) break;
7030 }
7031
7032 }
7033 #if 0
7034 reporterr( "position specific importance of seq %d:\n", i );
7035 for( pos=0; pos<nlenmax; pos++ )
7036 reporterr( "%d: %f\n", pos, importance[pos] );
7037 reporterr( "\n" );
7038 #endif
7039 for( j=0; j<nseq; j++ )
7040 {
7041 reporterr( "i=%d, j=%d\n", i, j );
7042 if( i == j ) continue;
7043 tmpptr = localhom[i]+j;
7044 do
7045 {
7046 tmpdouble = 0.0;
7047 len = 0;
7048 for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
7049 {
7050 tmpdouble += importance[pos];
7051 len++;
7052 }
7053 tmpdouble /= (double)len;
7054 tmpptr->importance = tmpdouble;
7055 reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
7056 tmpptr = tmpptr->next;
7057 } while( tmpptr );
7058 }
7059 }
7060 #if 1
7061 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
7062 {
7063 reporterr( "i = %d, j=%d\n", i, j );
7064 tmpptr1 = localhom[i]+j;
7065 tmpptr2 = localhom[j]+i;
7066 while( tmpptr1 && tmpptr2 )
7067 {
7068 tmpptr1->importance += tmpptr2->importance;
7069 tmpptr1->importance *= 0.5;
7070 tmpptr2->importance *= tmpptr1->importance;
7071 reporterr( "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
7072 tmpptr1 = tmpptr1->next;
7073 tmpptr2 = tmpptr2->next;
7074 reporterr( "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
7075 }
7076 }
7077 #endif
7078 }
7079
7080 void weightimportance2( int nseq, double *eff, LocalHom **localhom )
7081 {
7082 int i, j, pos, len;
7083 static double *wimportance;
7084 double tmpdouble;
7085 if( wimportance == NULL )
7086 wimportance = AllocateDoubleVec( nlenmax );
7087
7088
7089 reporterr( "effmtx = :\n" );
7090 for( i=0; i<nseq; i++ )
7091 {
7092 for( j=0; j<nseq; j++ )
7093 {
7094 reporterr( "%6.3f ", eff[i] * eff[j] );
7095 }
7096 reporterr( "\n" );
7097 }
7098 for( i=0; i<nseq; i++ )
7099 {
7100 reporterr( "i = %d\n", i );
7101 for( pos=0; pos<nlenmax; pos++ )
7102 wimportance[pos] = 0.0;
7103 for( j=0; j<nseq; j++ )
7104 {
7105 if( i == j ) continue;
7106 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
7107 // wimportance[pos] += eff[i][j];
7108 wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
7109 }
7110 #if 0
7111 reporterr( "position specific wimportance of seq %d:\n", i );
7112 for( pos=0; pos<nlenmax; pos++ )
7113 reporterr( "%d: %f\n", pos, wimportance[pos] );
7114 reporterr( "\n" );
7115 #endif
7116 for( j=0; j<nseq; j++ )
7117 {
7118 if( i == j ) continue;
7119 tmpdouble = 0.0;
7120 len = 0;
7121 for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
7122 {
7123 tmpdouble += wimportance[pos];
7124 len++;
7125 }
7126 tmpdouble /= (double)len;
7127 localhom[i][j].wimportance = tmpdouble;
7128 reporterr( "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
7129 }
7130 }
7131 #if 1
7132 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
7133 {
7134 localhom[i][j].wimportance += localhom[j][i].wimportance;
7135 localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
7136 }
7137 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
7138 {
7139 localhom[j][i].wimportance = localhom[i][j].wimportance;
7140 }
7141 #endif
7142 }
7143
7144 void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
7145 {
7146 int i, j, pos, len;
7147 static double *wimportance;
7148 LocalHom *tmpptr, *tmpptr1, *tmpptr2;
7149 if( wimportance == NULL )
7150 wimportance = AllocateDoubleVec( nlenmax );
7151
7152
7153 #if 0
7154 reporterr( "effarr1 = :\n" );
7155 for( i=0; i<clus1; i++ )
7156 reporterr( "%6.3f\n", eff1[i] );
7157 reporterr( "effarr2 = :\n" );
7158 for( i=0; i<clus2; i++ )
7159 reporterr( "%6.3f\n", eff2[i] );
7160 #endif
7161
7162 for( i=0; i<clus1; i++ )
7163 {
7164 for( j=0; j<clus2; j++ )
7165 {
7166 // reporterr( "i=%d, j=%d\n", i, j );
7167 tmpptr = localhom[i][j];
7168 do
7169 {
7170 tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
7171 tmpptr = tmpptr->next;
7172 } while( tmpptr );
7173 }
7174 }
7175 }
7176
7177 static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
7178 {
7179 LocalHom *tmpptr;
7180 tmpptr = localhom;
7181
7182 reporterr( "adding localhom\n" );
7183 while( tmpptr->next )
7184 tmpptr = tmpptr->next;
7185 reporterr( "allocating localhom\n" );
7186 tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
7187 reporterr( "done\n" );
7188 tmpptr = tmpptr->next;
7189
7190 tmpptr->start1 = start1;
7191 tmpptr->start2 = start2;
7192 tmpptr->end1 = end1;
7193 tmpptr->end2 = end2;
7194 tmpptr->opt = opt;
7195
7196 reporterr( "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
7197 }
7198
7199 #if 0
7200 #endif
7201
7202
7203
7204 void extendlocalhom( int nseq, LocalHom **localhom )
7205 {
7206 int i, j, k, pos0, pos1, pos2, st;
7207 int start1, start2, end1, end2;
7208 static int *tmpint1 = NULL;
7209 static int *tmpint2 = NULL;
7210 static int *tmpdouble1 = NULL;
7211 static int *tmpdouble2 = NULL;
7212 double opt;
7213 LocalHom *tmpptr;
7214 if( tmpint1 == NULL )
7215 {
7216 tmpint1 = AllocateIntVec( nlenmax );
7217 tmpint2 = AllocateIntVec( nlenmax );
7218 tmpdouble1 = AllocateIntVec( nlenmax );
7219 tmpdouble2 = AllocateIntVec( nlenmax );
7220 }
7221
7222
7223 for( k=0; k<nseq; k++ )
7224 {
7225 for( i=0; i<nseq-1; i++ )
7226 {
7227 if( i == k ) continue;
7228 for( pos0=0; pos0<nlenmax; pos0++ )
7229 tmpint1[pos0] = -1;
7230
7231 tmpptr=localhom[k]+i;
7232 do
7233 {
7234 pos0 = tmpptr->start1;
7235 pos1 = tmpptr->start2;
7236 while( pos0<=tmpptr->end1 )
7237 {
7238 tmpint1[pos0] = pos1++;
7239 tmpdouble1[pos0] = tmpptr->opt;
7240 pos0++;
7241 }
7242 } while( tmpptr = tmpptr->next );
7243
7244
7245 for( j=i+1; j<nseq; j++ )
7246 {
7247 if( j == k ) continue;
7248 for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
7249 tmpptr=localhom[k]+j;
7250 do
7251 {
7252 pos0 = tmpptr->start1;
7253 pos2 = tmpptr->start2;
7254 while( pos0<=tmpptr->end1 )
7255 {
7256 tmpint2[pos0] = pos2++;
7257 tmpdouble2[pos0++] = tmpptr->opt;
7258 }
7259 } while( tmpptr = tmpptr->next );
7260
7261 #if 0
7262
7263 reporterr( "i,j=%d,%d\n", i, j );
7264
7265 for( pos0=0; pos0<nlenmax; pos0++ )
7266 reporterr( "%d ", tmpint1[pos0] );
7267 reporterr( "\n" );
7268
7269 for( pos0=0; pos0<nlenmax; pos0++ )
7270 reporterr( "%d ", tmpint2[pos0] );
7271 reporterr( "\n" );
7272 #endif
7273
7274
7275 st = 0;
7276 for( pos0=0; pos0<nlenmax; pos0++ )
7277 {
7278 // reporterr( "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
7279 if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
7280 {
7281 if( st == 0 )
7282 {
7283 st = 1;
7284 start1 = tmpint1[pos0];
7285 start2 = tmpint2[pos0];
7286 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
7287 }
7288 else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
7289 {
7290 addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
7291 addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
7292 start1 = tmpint1[pos0];
7293 start2 = tmpint2[pos0];
7294 opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
7295 }
7296 }
7297 if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
7298 {
7299 if( st == 1 )
7300 {
7301 st = 0;
7302 addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
7303 addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
7304 }
7305 }
7306 }
7307 }
7308 }
7309 }
7310 }
7311 #endif
7312
addlocalhom2_e(LocalHom * pt,LocalHom * lh,int sti,int stj,int eni,int enj,double opt,int overlp,int interm)7313 static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
7314 {
7315 // dokka machigatteru
7316 if( pt != lh ) // susumeru
7317 {
7318 pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
7319 pt = pt->next;
7320 pt->next = NULL;
7321 lh->last = pt;
7322 }
7323 else // sonomamatsukau
7324 {
7325 lh->last = pt;
7326 }
7327 lh->nokori++;
7328 // reporterr( "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
7329
7330 pt->start1 = sti;
7331 pt->start2 = stj;
7332 pt->end1 = eni;
7333 pt->end2 = enj;
7334 pt->opt = opt;
7335 pt->extended = interm;
7336 pt->overlapaa = overlp;
7337 #if 0
7338 reporterr( "i: %d-%d\n", sti, eni );
7339 reporterr( "j: %d-%d\n", stj, enj );
7340 reporterr( "opt=%f\n", opt );
7341 reporterr( "overlp=%d\n", overlp );
7342 #endif
7343 }
7344
extendlocalhom2(int nseq,LocalHom ** localhom,double ** dist)7345 void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
7346 {
7347 int overlp, plim;
7348 int i, j, k;
7349 int pi, pj, pk, len;
7350 int status, sti, stj;
7351 int *ipt;
7352 int co;
7353 static int *ini = NULL;
7354 static int *inj = NULL;
7355 LocalHom *pt;
7356
7357 sti = 0; // by D.Mathog, a guess
7358 stj = 0; // by D.Mathog, a guess
7359
7360 if( ini == NULL )
7361 {
7362 ini = AllocateIntVec( nlenmax+1 );
7363 inj = AllocateIntVec( nlenmax+1 );
7364 }
7365
7366
7367 for( i=0; i<nseq-1; i++ )
7368 {
7369 for( j=i+1; j<nseq; j++ )
7370 {
7371 #if 0
7372 for( k=0; k<nseq; k++ ) sai[k] = 0;
7373 numint = ncons;
7374 while( 1 )
7375 {
7376 k = (int)( rnd() * nseq );
7377 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
7378 if( numint-- == 0 ) break;
7379 if( sai[k] ) continue;
7380 sai[k] = 1;
7381 #else
7382 for( k=0; k<nseq; k++ )
7383 {
7384 #endif
7385 // reporterr( "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
7386 if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
7387 if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
7388 ipt = ini; co = nlenmax+1;
7389 while( co-- ) *ipt++ = -1;
7390 ipt = inj; co = nlenmax+1;
7391 while( co-- ) *ipt++ = -1;
7392 overlp = 0;
7393
7394 {
7395 for( pt=localhom[i]+k; pt; pt=pt->next )
7396 {
7397 // reporterr( "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
7398 if( pt->opt == -1 )
7399 {
7400 reporterr( "opt kainaide tbfast.c = %f\n", pt->opt );
7401 }
7402 if( pt->extended > -1 ) break;
7403 pi = pt->start1;
7404 pk = pt->start2;
7405 len = pt->end1 - pt->start1 + 1;
7406 ipt = ini + pk;
7407 while( len-- ) *ipt++ = pi++;
7408 }
7409 }
7410
7411 {
7412 for( pt=localhom[j]+k; pt; pt=pt->next )
7413 {
7414 if( pt->opt == -1 )
7415 {
7416 reporterr( "opt kainaide tbfast.c = %f\n", pt->opt );
7417 }
7418 if( pt->extended > -1 ) break;
7419 pj = pt->start1;
7420 pk = pt->start2;
7421 len = pt->end1 - pt->start1 + 1;
7422 ipt = inj + pk;
7423 while( len-- ) *ipt++ = pj++;
7424 }
7425 }
7426 #if 0
7427 reporterr( "i=%d,j=%d,k=%d\n", i, j, k );
7428 overlp = 0;
7429 for( pk = 0; pk < nlenmax; pk++ )
7430 {
7431 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
7432 reporterr( " %d", inj[pk] );
7433 }
7434 reporterr( "\n" );
7435
7436 reporterr( "i=%d,j=%d,k=%d\n", i, j, k );
7437 overlp = 0;
7438 for( pk = 0; pk < nlenmax; pk++ )
7439 {
7440 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
7441 reporterr( " %d", ini[pk] );
7442 }
7443 reporterr( "\n" );
7444 #endif
7445 overlp = 0;
7446 plim = nlenmax+1;
7447 for( pk = 0; pk < plim; pk++ )
7448 if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
7449
7450
7451 status = 0;
7452 plim = nlenmax+1;
7453 for( pk=0; pk<plim; pk++ )
7454 {
7455 // reporterr( "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
7456 if( status )
7457 {
7458 if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
7459 {
7460 status = 0;
7461 // reporterr( "end here!\n" );
7462
7463 pt = localhom[i][j].last;
7464 // reporterr( "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
7465 addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
7466 // reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
7467
7468 pt = localhom[j][i].last;
7469 // reporterr( "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
7470 // reporterr( "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
7471 addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
7472 // reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
7473 }
7474 }
7475 if( !status ) // else deha arimasenn.
7476 {
7477 if( ini[pk] == -1 || inj[pk] == -1 ) continue;
7478 sti = ini[pk];
7479 stj = inj[pk];
7480 // reporterr( "start here!\n" );
7481 status = 1;
7482 }
7483 }
7484 // if( status ) reporterr( "end here\n" );
7485
7486 // exit( 1 );
7487 // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
7488 }
7489 #if 0
7490 for( pt=localhomtable[i]+j; pt; pt=pt->next )
7491 {
7492 if( tmpptr->opt == -1.0 ) continue;
7493 fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
7494 }
7495 #endif
7496 }
7497 }
7498 }
7499
7500 int makelocal( char *s1, char *s2, int thr )
7501 {
7502 int start, maxstart, maxend;
7503 char *pt1, *pt2;
7504 double score;
7505 double maxscore;
7506
7507 pt1 = s1;
7508 pt2 = s2;
7509
7510 maxend = 0; // by D.Mathog, a guess
7511
7512 // reporterr( "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
7513 maxscore = 0.0;
7514 score = 0.0;
7515 start = 0;
7516 maxstart = 0;
7517 while( *pt1 )
7518 {
7519 // reporterr( "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
7520 if( *pt1 == '-' || *pt2 == '-' )
7521 {
7522 // reporterr( "penalty = %d\n", penalty );
7523 score += penalty;
7524 while( *pt1 == '-' || *pt2 == '-' )
7525 {
7526 pt1++; pt2++;
7527 }
7528 continue;
7529 }
7530
7531 score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
7532 // score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
7533 if( score > maxscore )
7534 {
7535 // reporterr( "score = %f\n", score );
7536 maxscore = score;
7537 maxstart = start;
7538 // reporterr( "## max! maxstart = %d, start = %d\n", maxstart, start );
7539 }
7540 if( score < 0.0 )
7541 {
7542 // reporterr( "## resetting, start = %d, maxstart = %d\n", start, maxstart );
7543 if( start == maxstart )
7544 {
7545 maxend = pt1 - s1;
7546 // reporterr( "maxend = %d\n", maxend );
7547 }
7548 score = 0.0;
7549 start = pt1 - s1;
7550 }
7551 }
7552 if( start == maxstart )
7553 maxend = pt1 - s1 - 1;
7554
7555 // reporterr( "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
7556 s1[maxend+1] = 0;
7557 s2[maxend+1] = 0;
7558 return( maxstart );
7559 }
7560
7561 void resetlocalhom( int nseq, LocalHom **lh )
7562 {
7563 int i, j;
7564 LocalHom *pt;
7565
7566 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
7567 {
7568 for( pt=lh[i]+j; pt; pt=pt->next )
7569 pt->opt = 1.0;
7570 }
7571
7572 }
7573
7574 void gapireru( char *res, char *ori, char *gt )
7575 {
7576 char g;
7577 while( (g = *gt++) )
7578 {
7579 if( g == '-' )
7580 {
7581 *res++ = *newgapstr;
7582 }
7583 else
7584 {
7585 *res++ = *ori++;
7586 }
7587 }
7588 *res = 0;
7589 }
7590
7591 void getkyokaigap( char *g, char **s, int pos, int n )
7592 {
7593 // char *bk = g;
7594 // while( n-- ) *g++ = '-';
7595 while( n-- ) *g++ = (*s++)[pos];
7596
7597 // reporterr( "bk = %s\n", bk );
7598 }
7599
7600 void new_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
7601 #if 0
7602 {
7603 int i, j, gc, gb;
7604 double feff;
7605
7606
7607 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
7608 for( j=0; j<clus; j++ )
7609 {
7610 feff = (double)eff[j];
7611 gc = ( sgappat[j] == '-' );
7612 for( i=0; i<len; i++ )
7613 {
7614 gb = gc;
7615 gc = ( seq[j][i] == '-' );
7616 if( !gb * gc ) ogcp[i] += feff;
7617 }
7618 }
7619 }
7620 #else
7621 {
7622 int i, j, gc, gb;
7623 double feff;
7624 double *fpt;
7625 char *spt;
7626
7627 fpt = ogcp;
7628 i = len;
7629 while( i-- ) *fpt++ = 0.0;
7630 for( j=0; j<clus; j++ )
7631 {
7632 feff = (double)eff[j];
7633 spt = seq[j];
7634 fpt = ogcp;
7635 gc = ( sgappat[j] == '-' );
7636 i = len;
7637 while( i-- )
7638 {
7639 gb = gc;
7640 gc = ( *spt++ == '-' );
7641 {
7642 if( !gb * gc ) *fpt += feff;
7643 fpt++;
7644 }
7645 }
7646 }
7647 }
7648 #endif
7649 void new_OpeningGapCount_zure( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
7650 #if 0
7651 {
7652 int i, j, gc, gb;
7653 double feff;
7654
7655
7656 for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
7657 for( j=0; j<clus; j++ )
7658 {
7659 feff = (double)eff[j];
7660 gc = ( sgappat[j] == '-' );
7661 for( i=0; i<len; i++ )
7662 {
7663 gb = gc;
7664 gc = ( seq[j][i] == '-' );
7665 if( !gb * gc ) ogcp[i] += feff;
7666 }
7667 {
7668 gb = gc;
7669 gc = ( egappat[j] == '-' );
7670 if( !gb * gc ) ogcp[i] += feff;
7671 }
7672 }
7673 }
7674 #else
7675 {
7676 int i, j, gc, gb;
7677 double feff;
7678 double *fpt;
7679 char *spt;
7680
7681 fpt = ogcp;
7682 i = len+2;
7683 while( i-- ) *fpt++ = 0.0;
7684 for( j=0; j<clus; j++ )
7685 {
7686 feff = (double)eff[j];
7687 spt = seq[j];
7688 fpt = ogcp;
7689 gc = ( sgappat[j] == '-' );
7690 i = len;
7691 while( i-- )
7692 {
7693 gb = gc;
7694 gc = ( *spt++ == '-' );
7695 {
7696 if( !gb * gc ) *fpt += feff;
7697 fpt++;
7698 }
7699 }
7700 {
7701 gb = gc;
7702 gc = ( egappat[j] == '-' );
7703 if( !gb * gc ) *fpt += feff;
7704 }
7705 }
7706 }
7707 #endif
7708
7709 void new_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
7710 #if 0
7711 {
7712 int i, j, gc, gb;
7713 double feff;
7714
7715 for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
7716 for( j=0; j<clus; j++ )
7717 {
7718 feff = (double)eff[j];
7719 gc = ( sgappat[j] == '-' );
7720 for( i=0; i<len; i++ )
7721 {
7722 gb = gc;
7723 gc = ( seq[j][i] == '-' );
7724 {
7725 if( gb * !gc ) fgcp[i] += feff;
7726 }
7727 }
7728 {
7729 gb = gc;
7730 gc = ( egappat[j] == '-' );
7731 {
7732 if( gb * !gc ) fgcp[len] += feff;
7733 }
7734 }
7735 }
7736 }
7737 #else
7738 {
7739 int i, j, gc, gb;
7740 double feff;
7741 double *fpt;
7742 char *spt;
7743
7744 fpt = fgcp;
7745 i = len+2;
7746 while( i-- ) *fpt++ = 0.0;
7747 for( j=0; j<clus; j++ )
7748 {
7749 feff = (double)eff[j];
7750 fpt = fgcp;
7751 spt = seq[j];
7752 gc = ( sgappat[j] == '-' );
7753 i = len;
7754 while( i-- )
7755 {
7756 gb = gc;
7757 gc = ( *spt++ == '-' );
7758 {
7759 if( gb * !gc ) *fpt += feff;
7760 fpt++;
7761 }
7762 }
7763 {
7764 gb = gc;
7765 gc = ( egappat[j] == '-' );
7766 {
7767 if( gb * !gc ) *fpt += feff;
7768 }
7769 }
7770 }
7771 }
7772 #endif
7773 void new_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
7774 #if 0
7775 {
7776 int i, j, gc, gb;
7777 double feff;
7778
7779 for( i=0; i<len; i++ ) fgcp[i] = 0.0;
7780 for( j=0; j<clus; j++ )
7781 {
7782 feff = (double)eff[j];
7783 gc = ( seq[j][0] == '-' );
7784 for( i=1; i<len; i++ )
7785 {
7786 gb = gc;
7787 gc = ( seq[j][i] == '-' );
7788 {
7789 if( gb * !gc ) fgcp[i-1] += feff;
7790 }
7791 }
7792 {
7793 gb = gc;
7794 gc = ( egappat[j] == '-' );
7795 {
7796 if( gb * !gc ) fgcp[len-1] += feff;
7797 }
7798 }
7799 }
7800 }
7801 #else
7802 {
7803 int i, j, gc, gb;
7804 double feff;
7805 double *fpt;
7806 char *spt;
7807
7808 fpt = fgcp;
7809 i = len;
7810 while( i-- ) *fpt++ = 0.0;
7811 for( j=0; j<clus; j++ )
7812 {
7813 feff = (double)eff[j];
7814 fpt = fgcp;
7815 spt = seq[j];
7816 gc = ( *spt == '-' );
7817 i = len;
7818 while( i-- )
7819 {
7820 gb = gc;
7821 gc = ( *++spt == '-' );
7822 {
7823 if( gb * !gc ) *fpt += feff;
7824 fpt++;
7825 }
7826 }
7827 {
7828 gb = gc;
7829 gc = ( egappat[j] == '-' );
7830 {
7831 if( gb * !gc ) *fpt += feff;
7832 }
7833 }
7834 }
7835 }
7836 #endif
7837
7838 void st_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len )
7839 {
7840 int i, j, gc, gb;
7841 double feff;
7842 double *fpt;
7843 char *spt;
7844
7845 fpt = ogcp;
7846 i = len;
7847 while( i-- ) *fpt++ = 0.0;
7848 for( j=0; j<clus; j++ )
7849 {
7850 feff = (double)eff[j];
7851 spt = seq[j];
7852 fpt = ogcp;
7853 gc = 0;
7854 // gc = 1;
7855 i = len;
7856 while( i-- )
7857 {
7858 gb = gc;
7859 gc = ( *spt++ == '-' );
7860 {
7861 if( !gb * gc ) *fpt += feff;
7862 fpt++;
7863 }
7864 }
7865 }
7866 ogcp[len] = 0.0;
7867 }
7868
7869 void st_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len )
7870 {
7871 int i, j, gc, gb;
7872 double feff;
7873 double *fpt;
7874 char *spt;
7875
7876 fpt = fgcp;
7877 i = len+1;
7878 while( i-- ) *fpt++ = 0.0;
7879 for( j=0; j<clus; j++ )
7880 {
7881 feff = (double)eff[j];
7882 fpt = fgcp+1;
7883 spt = seq[j];
7884 gc = ( *spt == '-' );
7885 i = len;
7886 // for( i=1; i<len; i++ )
7887 while( i-- )
7888 {
7889 gb = gc;
7890 gc = ( *++spt == '-' );
7891 {
7892 if( gb * !gc ) *fpt += feff;
7893 fpt++;
7894 }
7895 }
7896 {
7897 gb = gc;
7898 gc = 0;
7899 // gc = 1;
7900 {
7901 if( gb * !gc ) *fpt += feff;
7902 }
7903 }
7904 }
7905 }
7906
7907 void st_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len )
7908 {
7909 int i, j, gc, gb;
7910 double feff;
7911 double *fpt;
7912 char *spt;
7913
7914 fpt = fgcp;
7915 i = len;
7916 while( i-- ) *fpt++ = 0.0;
7917 for( j=0; j<clus; j++ )
7918 {
7919 feff = (double)eff[j];
7920 fpt = fgcp;
7921 spt = seq[j];
7922 gc = ( *spt == '-' );
7923 i = len;
7924 // for( i=1; i<len; i++ )
7925 while( i-- )
7926 {
7927 gb = gc;
7928 gc = ( *++spt == '-' );
7929 {
7930 if( gb * !gc ) *fpt += feff;
7931 fpt++;
7932 }
7933 }
7934 {
7935 gb = gc;
7936 gc = 0;
7937 // gc = 1;
7938 {
7939 if( gb * !gc ) *fpt += feff;
7940 }
7941 }
7942 }
7943 }
7944
7945 void getGapPattern( double *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
7946 {
7947 int i, j, gc, gb;
7948 double feff;
7949 double *fpt;
7950 char *spt;
7951
7952 fpt = fgcp;
7953 i = len+1;
7954 while( i-- ) *fpt++ = 0.0;
7955 for( j=0; j<clus; j++ )
7956 {
7957 feff = (double)eff[j];
7958 fpt = fgcp;
7959 spt = seq[j];
7960 gc = ( *spt == '-' );
7961 i = len+1;
7962 while( i-- )
7963 {
7964 gb = gc;
7965 gc = ( *++spt == '-' );
7966 {
7967 if( gb * !gc ) *fpt += feff;
7968 fpt++;
7969 }
7970 }
7971 #if 0
7972 {
7973 gb = gc;
7974 gc = ( egappat[j] == '-' );
7975 {
7976 if( gb * !gc ) *fpt += feff;
7977 }
7978 }
7979 #endif
7980 }
7981 for( j=0; j<len; j++ )
7982 {
7983 reporterr( "%d, %f\n", j, fgcp[j] );
7984 }
7985 }
7986
7987 void getdigapfreq_st( double *freq, int clus, char **seq, double *eff, int len )
7988 {
7989 int i, j;
7990 double feff;
7991 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
7992 for( i=0; i<clus; i++ )
7993 {
7994 feff = eff[i];
7995 if( 0 && seq[i][0] == '-' ) // machigai kamo
7996 freq[0] += feff;
7997 for( j=1; j<len; j++ )
7998 {
7999 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
8000 freq[j] += feff;
8001 }
8002 if( 0 && seq[i][len-1] == '-' )
8003 freq[len] += feff;
8004 }
8005 // reporterr( "\ndigapf = \n" );
8006 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8007 }
8008
8009 void getdiaminofreq_x( double *freq, int clus, char **seq, double *eff, int len )
8010 {
8011 int i, j;
8012 double feff;
8013 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
8014 for( i=0; i<clus; i++ )
8015 {
8016 feff = eff[i];
8017 if( seq[i][0] != '-' ) // tadashii
8018 freq[0] += feff;
8019 for( j=1; j<len; j++ )
8020 {
8021 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
8022 freq[j] += feff;
8023 }
8024 if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
8025 freq[len] += feff;
8026 }
8027 // reporterr( "\ndiaaf = \n" );
8028 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8029 }
8030
8031 void getdiaminofreq_st( double *freq, int clus, char **seq, double *eff, int len )
8032 {
8033 int i, j;
8034 double feff;
8035 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
8036 for( i=0; i<clus; i++ )
8037 {
8038 feff = eff[i];
8039 if( seq[i][0] != '-' )
8040 freq[0] += feff;
8041 for( j=1; j<len; j++ )
8042 {
8043 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
8044 freq[j] += feff;
8045 }
8046 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
8047 freq[len] += feff;
8048 }
8049 // reporterr( "\ndiaaf = \n" );
8050 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8051 }
8052
8053 void getdigapfreq_part( double *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
8054 {
8055 int i, j;
8056 double feff;
8057 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
8058 for( i=0; i<clus; i++ )
8059 {
8060 feff = eff[i];
8061 // if( seq[i][0] == '-' )
8062 if( seq[i][0] == '-' && sgappat[i] == '-' )
8063 freq[0] += feff;
8064 for( j=1; j<len; j++ )
8065 {
8066 if( seq[i][j] == '-' && seq[i][j-1] == '-' )
8067 freq[j] += feff;
8068 }
8069 // if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
8070 if( egappat[i] == '-' && seq[i][len-1] == '-' )
8071 freq[len] += feff;
8072 }
8073 // reporterr( "\ndigapf = \n" );
8074 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8075 }
8076
8077 void getdiaminofreq_part( double *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
8078 {
8079 int i, j;
8080 double feff;
8081 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
8082 for( i=0; i<clus; i++ )
8083 {
8084 feff = eff[i];
8085 if( seq[i][0] != '-' && sgappat[i] != '-' )
8086 freq[0] += feff;
8087 for( j=1; j<len; j++ )
8088 {
8089 if( seq[i][j] != '-' && seq[i][j-1] != '-' )
8090 freq[j] += feff;
8091 }
8092 // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
8093 if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
8094 freq[len] += feff;
8095 }
8096 // reporterr( "\ndiaaf = \n" );
8097 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8098 }
8099
8100 void getgapfreq_zure_part( double *freq, int clus, char **seq, double *eff, int len, char *sgap )
8101 {
8102 int i, j;
8103 double feff;
8104 for( i=0; i<len+2; i++ ) freq[i] = 0.0;
8105 for( i=0; i<clus; i++ )
8106 {
8107 feff = eff[i];
8108 if( sgap[i] == '-' )
8109 freq[0] += feff;
8110 for( j=0; j<len; j++ )
8111 {
8112 if( seq[i][j] == '-' )
8113 freq[j+1] += feff;
8114 }
8115 // if( egap[i] == '-' )
8116 // freq[len+1] += feff;
8117 }
8118 // reporterr( "\ngapf = \n" );
8119 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8120 }
8121
8122 void getgapfreq_zure( double *freq, int clus, char **seq, double *eff, int len )
8123 {
8124 int i, j;
8125 double feff;
8126 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
8127 for( i=0; i<clus; i++ )
8128 {
8129 feff = eff[i];
8130 for( j=0; j<len; j++ )
8131 {
8132 if( seq[i][j] == '-' )
8133 freq[j+1] += feff;
8134 }
8135 }
8136 freq[len+1] = 0.0;
8137 // reporterr( "\ngapf = \n" );
8138 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8139 }
8140
8141 void getgapfreq( double *freq, int clus, char **seq, double *eff, int len )
8142 {
8143 int i, j;
8144 double feff;
8145 for( i=0; i<len+1; i++ ) freq[i] = 0.0;
8146 for( i=0; i<clus; i++ )
8147 {
8148 feff = eff[i];
8149 for( j=0; j<len; j++ )
8150 {
8151 if( seq[i][j] == '-' )
8152 freq[j] += feff;
8153 }
8154 }
8155 freq[len] = 0.0;
8156 // reporterr( "\ngapf = \n" );
8157 // for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
8158 }
8159
8160 void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
8161 {
8162 int i, j, k, gb, gc;
8163 int known;
8164 double feff;
8165 Gappat **fpt;
8166 char *spt;
8167 int gaplen;
8168
8169 fpt = pat;
8170 i = len+1;
8171 while( i-- )
8172 {
8173 if( *fpt ) free( *fpt );
8174 *fpt++ = NULL;
8175 }
8176
8177 for( j=0; j<clus; j++ )
8178 {
8179 // reporterr( "seq[%d] = %s\n", j, seq[j] );
8180 feff = (double)eff[j];
8181
8182 fpt = pat;
8183 *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
8184 spt = seq[j];
8185 gc = 0;
8186 gaplen = 0;
8187
8188 for( i=0; i<len+1; i++ )
8189 // while( i-- )
8190 {
8191 // reporterr( "i=%d, gaplen = %d\n", i, gaplen );
8192 gb = gc;
8193 gc = ( i != len && *spt++ == '-' );
8194 if( gc )
8195 gaplen++;
8196 else
8197 {
8198 if( gb && gaplen )
8199 {
8200 k = 1;
8201 known = 0;
8202 if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
8203 {
8204 if( (*fpt)[k].len == gaplen )
8205 {
8206 // reporterr( "known\n" );
8207 known = 1;
8208 break;
8209 }
8210 }
8211
8212 if( known == 0 )
8213 {
8214 *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
8215 if( !*fpt )
8216 {
8217 reporterr( "Cannot allocate gappattern!'n" );
8218 reporterr( "Use an approximate method, with the --mafft5 option.\n" );
8219 exit( 1 );
8220 }
8221 (*fpt)[k].freq = 0.0;
8222 (*fpt)[k].len = gaplen;
8223 (*fpt)[k+1].len = -1;
8224 (*fpt)[k+1].freq = 0.0; // iranai
8225 // reporterr( "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
8226 }
8227
8228 // reporterr( "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
8229 (*fpt)[k].freq += feff;
8230 // reporterr( "%f\n", (*fpt)[k].freq );
8231 gaplen = 0;
8232 }
8233 }
8234 fpt++;
8235 }
8236 }
8237 #if 1
8238 for( j=0; j<len+1; j++ )
8239 {
8240 if( pat[j] )
8241 {
8242 // reporterr( "j=%d\n", j );
8243 // for( i=1; pat[j][i].len!=-1; i++ )
8244 // reporterr( "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
8245
8246 pat[j][0].len = 0; // iminashi
8247 pat[j][0].freq = 0.0;
8248 for( i=1; pat[j][i].len!=-1;i++ )
8249 {
8250 pat[j][0].freq += pat[j][i].freq;
8251 // reporterr( "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
8252 }
8253 // reporterr( "totaled, result = %f\n", pat[j][0].freq );
8254
8255 pat[j][i].freq = 1.0 - pat[j][0].freq;
8256 pat[j][i].len = 0; // imiari
8257 pat[j][i+1].len = -1;
8258 }
8259 else
8260 {
8261 pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
8262 pat[j][0].freq = 0.0;
8263 pat[j][0].len = 0; // iminashi
8264
8265 pat[j][1].freq = 1.0 - pat[j][0].freq;
8266 pat[j][1].len = 0; // imiari
8267 pat[j][2].len = -1;
8268 }
8269 }
8270 #endif
8271 }
8272
8273 static int minimum( int i1, int i2 )
8274 {
8275 return MIN( i1, i2 );
8276 }
8277
8278 static void commongappickpairfast( char *r1, char *r2, char *i1, char *i2, int *skip1, int *skip2 )
8279 {
8280 // char *i1bk = i1;
8281 int skip, skipped1, skipped2;
8282 // int skip, skipped1, skipped2, scand1, scand2;
8283 skipped1 = skipped2 = 0;
8284 // reporterr("\n");
8285 // while( *i1 )
8286 while( 1 )
8287 {
8288 // fprintf( stderr, "i1 pos =%d\n", (int)(i1- i1bk) );
8289 // reporterr( "\nSkip cand %d-%d\n", *skip1-skipped1, *skip2-skipped2 );
8290 #if 0
8291 scand1 = *skip1-skipped1;
8292 scand2 = *skip2-skipped2;
8293 skip = MIN( scand1, scand2 );
8294 #else
8295 skip = minimum( *skip1-skipped1, *skip2-skipped2 );
8296 #endif
8297 // reporterr( "Skip %d\n", skip );
8298 i1 += skip;
8299 i2 += skip;
8300 skipped1 += skip;
8301 skipped2 += skip;
8302 // fprintf( stderr, "i1 pos =%d, nlenmax=%d\n", (int)(i1- i1bk), nlenmax );
8303 if( !*i1 ) break;
8304 // reporterr( "%d, %c-%c\n", i1-i1bk, *i1, *i2 );
8305 // if( *i1 == '-' && *i2 == '-' ) // iranai?
8306 // {
8307 // reporterr( "Error in commongappickpairfast" );
8308 // exit( 1 );
8309 // i1++;
8310 // i2++;
8311 // }
8312 if( *i1 != '-' )
8313 {
8314 skipped1 = 0;
8315 skip1++;
8316 }
8317 else skipped1++;
8318
8319 if( *i2 != '-' )
8320 {
8321 skipped2 = 0;
8322 skip2++;
8323 }
8324 else skipped2++;
8325
8326 *r1++ = *i1++;
8327 *r2++ = *i2++;
8328 }
8329 *r1 = 0;
8330 *r2 = 0;
8331 }
8332
8333 static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
8334 {
8335 // strcpy( r1, i1 );
8336 // strcpy( r2, i2 );
8337 // return; // not SP
8338 while( *i1 )
8339 {
8340 if( *i1 == '-' && *i2 == '-' )
8341 {
8342 i1++;
8343 i2++;
8344 }
8345 else
8346 {
8347 *r1++ = *i1++;
8348 *r2++ = *i2++;
8349 }
8350 }
8351 *r1 = 0;
8352 *r2 = 0;
8353 }
8354
8355 double naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
8356 {
8357 // return( 0 );
8358 int i, j;
8359 double val;
8360 double valf;
8361 int pv;
8362 double deff;
8363 char *p1, *p2, *p1p, *p2p;
8364 val = 0.0;
8365 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
8366 {
8367 deff = eff1[i] * eff2[j];
8368 // reporterr( "feff %d-%d = %f\n", i, j, feff );
8369 // reporterr( "i1 = %s\n", seq1[i] );
8370 // reporterr( "i2 = %s\n", seq2[j] );
8371 // reporterr( "s1 = %s\n", s1 );
8372 // reporterr( "s2 = %s\n", s2 );
8373 // reporterr( "penal = %d\n", penal );
8374
8375 valf = 0;
8376 p1 = seq1[i]; p2 = seq2[j];
8377 pv = 0;
8378 if( *p1 == '-' && *p2 != '-' )
8379 pv = penal;
8380 if( *p1 != '-' && *p2 == '-' )
8381 pv = penal;
8382 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
8383 p1p = p1; p2p = p2;
8384 valf += (double)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
8385 while( *p1p )
8386 {
8387 pv = 0;
8388 if( *p1p != '-' && *p2p != '-' )
8389 {
8390 if( *p1 == '-' && *p2 != '-' )
8391 pv = penal;
8392 if( *p1 != '-' && *p2 == '-' )
8393 pv = penal;
8394 if( *p1 != '-' && *p2 != '-' )
8395 ;
8396 if( *p1 == '-' && *p2 == '-' )
8397 ;
8398 }
8399 if( *p1p == '-' && *p2p == '-' )
8400 {
8401 if( *p1 == '-' && *p2 != '-' )
8402 pv = penal;
8403 // ;
8404 if( *p1 != '-' && *p2 == '-' )
8405 pv = penal;
8406 // ;
8407 if( *p1 != '-' && *p2 != '-' )
8408 ;
8409 if( *p1 == '-' && *p2 == '-' )
8410 ;
8411 }
8412 if( *p1p != '-' && *p2p == '-' )
8413 {
8414 if( *p1 == '-' && *p2 != '-' )
8415 pv = penal * 2; // ??
8416 // ;
8417 if( *p1 != '-' && *p2 == '-' )
8418 ;
8419 if( *p1 != '-' && *p2 != '-' )
8420 pv = penal;
8421 // ;
8422 if( *p1 == '-' && *p2 == '-' )
8423 pv = penal;
8424 // ;
8425 }
8426 if( *p1p == '-' && *p2p != '-' )
8427 {
8428 if( *p1 == '-' && *p2 != '-' )
8429 ;
8430 if( *p1 != '-' && *p2 == '-' )
8431 pv = penal * 2; // ??
8432 // ;
8433 if( *p1 != '-' && *p2 != '-' )
8434 pv = penal;
8435 // ;
8436 if( *p1 == '-' && *p2 == '-' )
8437 pv = penal;
8438 // ;
8439 }
8440 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8441 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
8442 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
8443 p1p++; p2p++;
8444 }
8445 // reporterr( "valf = %d\n", valf );
8446 val += deff * ( valf );
8447 }
8448 reporterr( "val = %f\n", val );
8449 return( val );
8450 // exit( 1 );
8451 }
8452 double naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
8453 {
8454 int i, j;
8455 double val;
8456 double valf;
8457 int pv;
8458 double deff;
8459 char *p1, *p2, *p1p, *p2p;
8460 return( 0 );
8461 val = 0.0;
8462 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
8463 {
8464 deff = eff1[i] * eff2[j];
8465 // reporterr( "feff %d-%d = %f\n", i, j, feff );
8466 // reporterr( "i1 = %s\n", seq1[i] );
8467 // reporterr( "i2 = %s\n", seq2[j] );
8468 // reporterr( "s1 = %s\n", s1 );
8469 // reporterr( "s2 = %s\n", s2 );
8470 // reporterr( "penal = %d\n", penal );
8471
8472 valf = 0;
8473 p1 = seq1[i]; p2 = seq2[j];
8474 pv = 0;
8475 if( *p1 == '-' && *p2 != '-' )
8476 pv = penal;
8477 if( *p1 != '-' && *p2 == '-' )
8478 pv = penal;
8479 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
8480 p1p = p1; p2p = p2;
8481 valf += (double)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
8482 while( *p1p )
8483 {
8484 pv = 0;
8485 if( *p1p != '-' && *p2p != '-' )
8486 {
8487 if( *p1 == '-' && *p2 != '-' )
8488 pv = penal;
8489 if( *p1 != '-' && *p2 == '-' )
8490 pv = penal;
8491 if( *p1 != '-' && *p2 != '-' )
8492 ;
8493 if( *p1 == '-' && *p2 == '-' )
8494 ;
8495 }
8496 if( *p1p == '-' && *p2p == '-' )
8497 {
8498 if( *p1 == '-' && *p2 != '-' )
8499 // pv = penal;
8500 ;
8501 if( *p1 != '-' && *p2 == '-' )
8502 // pv = penal;
8503 ;
8504 if( *p1 != '-' && *p2 != '-' )
8505 ;
8506 if( *p1 == '-' && *p2 == '-' )
8507 ;
8508 }
8509 if( *p1p != '-' && *p2p == '-' )
8510 {
8511 if( *p1 == '-' && *p2 != '-' )
8512 pv = penal * 2; // ??
8513 // ;
8514 if( *p1 != '-' && *p2 == '-' )
8515 ;
8516 if( *p1 != '-' && *p2 != '-' )
8517 pv = penal;
8518 // ;
8519 if( *p1 == '-' && *p2 == '-' )
8520 // pv = penal;
8521 ;
8522 }
8523 if( *p1p == '-' && *p2p != '-' )
8524 {
8525 if( *p1 == '-' && *p2 != '-' )
8526 ;
8527 if( *p1 != '-' && *p2 == '-' )
8528 pv = penal * 2; // ??
8529 // ;
8530 if( *p1 != '-' && *p2 != '-' )
8531 pv = penal;
8532 // ;
8533 if( *p1 == '-' && *p2 == '-' )
8534 // pv = penal;
8535 ;
8536 }
8537 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8538 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
8539 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
8540 p1p++; p2p++;
8541 }
8542 // reporterr( "valf = %d\n", valf );
8543 val += deff * ( valf );
8544 }
8545 reporterr( "val = %f\n", val );
8546 return( val );
8547 // exit( 1 );
8548 }
8549 double naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
8550 {
8551 int i, j;
8552 double val;
8553 double valf;
8554 int pv;
8555 // double feff = 0.0; // by D.Mathog, a guess
8556 double deff;
8557 char *p1, *p2, *p1p, *p2p;
8558 val = 0.0;
8559 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
8560 {
8561 deff = eff1[i] * eff2[j];
8562 // reporterr( "i1 = %s\n", seq1[i] );
8563 // reporterr( "i2 = %s\n", seq2[j] );
8564 // reporterr( "s1 = %s\n", s1 );
8565 // reporterr( "s2 = %s\n", s2 );
8566 // reporterr( "penal = %d\n", penal );
8567
8568 valf = 0;
8569 p1 = seq1[i]; p2 = seq2[j];
8570 pv = 0;
8571 if( *p1 == '-' && *p2 != '-' )
8572 pv = penal;
8573 if( *p1 != '-' && *p2 == '-' )
8574 pv = penal;
8575 if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
8576 p1p = p1; p2p = p2;
8577 valf += (double)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
8578 while( *p1p )
8579 {
8580 pv = 0;
8581 if( *p1p != '-' && *p2p != '-' )
8582 {
8583 if( *p1 == '-' && *p2 != '-' )
8584 pv = penal;
8585 if( *p1 != '-' && *p2 == '-' )
8586 pv = penal;
8587 if( *p1 != '-' && *p2 != '-' )
8588 ;
8589 if( *p1 == '-' && *p2 == '-' )
8590 ;
8591 }
8592 if( *p1p == '-' && *p2p == '-' )
8593 {
8594 if( *p1 == '-' && *p2 != '-' )
8595 // pv = penal;
8596 ;
8597 if( *p1 != '-' && *p2 == '-' )
8598 // pv = penal;
8599 ;
8600 if( *p1 != '-' && *p2 != '-' )
8601 ;
8602 if( *p1 == '-' && *p2 == '-' )
8603 ;
8604 }
8605 if( *p1p != '-' && *p2p == '-' )
8606 {
8607 if( *p1 == '-' && *p2 != '-' )
8608 // pv = penal;
8609 ;
8610 if( *p1 != '-' && *p2 == '-' )
8611 ;
8612 if( *p1 != '-' && *p2 != '-' )
8613 pv = penal;
8614 if( *p1 == '-' && *p2 == '-' )
8615 // pv = penal;
8616 ;
8617 }
8618 if( *p1p == '-' && *p2p != '-' )
8619 {
8620 if( *p1 == '-' && *p2 != '-' )
8621 ;
8622 if( *p1 != '-' && *p2 == '-' )
8623 // pv = penal;
8624 ;
8625 if( *p1 != '-' && *p2 != '-' )
8626 pv = penal;
8627 if( *p1 == '-' && *p2 == '-' )
8628 // pv = penal;
8629 ;
8630 }
8631 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8632 // if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
8633 valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
8634 p1p++; p2p++;
8635 }
8636 // reporterr( "valf = %d\n", valf );
8637 val += deff * ( valf );
8638 }
8639 reporterr( "val = %f\n", val );
8640 return( val );
8641 // exit( 1 );
8642 }
8643
8644 double naivepairscorefast( char *seq1, char *seq2, int *skip1, int *skip2, int penal )
8645 {
8646 double vali;
8647 int len = strlen( seq1 );
8648 char *s1, *s2;
8649 char *p1, *p2;
8650
8651 s1 = calloc( len+1, sizeof( char ) );
8652 s2 = calloc( len+1, sizeof( char ) );
8653 {
8654 vali = 0.0;
8655 commongappickpairfast( s1, s2, seq1, seq2, skip1, skip2 );
8656 // commongappickpair( s1, s2, seq1, seq2 );
8657 // reporterr( "\n###s1 = %s\n", seq1 );
8658 // reporterr( "###s2 = %s\n", seq2 );
8659 // reporterr( "\n###i1 = %s\n", s1 );
8660 // reporterr( "###i2 = %s\n", s2 );
8661 // reporterr( "allocated size, len+1 = %d\n", len+1 );
8662 // reporterr( "###penal = %d\n", penal );
8663
8664 p1 = s1; p2 = s2;
8665 while( *p1 )
8666 {
8667 if( *p1 == '-' )
8668 {
8669 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8670 vali += (double)penal;
8671 // while( *p1 == '-' || *p2 == '-' )
8672 while( *p1 == '-' ) // SP
8673 {
8674 p1++;
8675 p2++;
8676 }
8677 continue;
8678 }
8679 if( *p2 == '-' )
8680 {
8681 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8682 vali += (double)penal;
8683 // while( *p2 == '-' || *p1 == '-' )
8684 while( *p2 == '-' ) // SP
8685 {
8686 p1++;
8687 p2++;
8688 }
8689 continue;
8690 }
8691 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8692 vali += (double)amino_dis[(int)*p1++][(int)*p2++];
8693 }
8694 }
8695 free( s1 );
8696 free( s2 );
8697 // reporterr( "###vali = %d\n", vali );
8698 return( vali );
8699 }
8700
8701 double naivepairscore11_dynmtx( double **mtx, char *seq1, char *seq2, int penal )
8702 {
8703 double vali;
8704 int len = strlen( seq1 );
8705 char *s1, *s2, *p1, *p2;
8706 int c1, c2;
8707
8708
8709 s1 = calloc( len+1, sizeof( char ) );
8710 s2 = calloc( len+1, sizeof( char ) );
8711 {
8712 vali = 0.0;
8713 commongappickpair( s1, s2, seq1, seq2 );
8714 // reporterr( "###i1 = %s\n", s1 );
8715 // reporterr( "###i2 = %s\n", s2 );
8716 // reporterr( "###penal = %d\n", penal );
8717
8718 p1 = s1; p2 = s2;
8719 while( *p1 )
8720 {
8721 if( *p1 == '-' )
8722 {
8723 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8724 vali += (double)penal;
8725 // while( *p1 == '-' || *p2 == '-' )
8726 while( *p1 == '-' ) // SP
8727 {
8728 p1++;
8729 p2++;
8730 }
8731 continue;
8732 }
8733 if( *p2 == '-' )
8734 {
8735 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8736 vali += (double)penal;
8737 // while( *p2 == '-' || *p1 == '-' )
8738 while( *p2 == '-' ) // SP
8739 {
8740 p1++;
8741 p2++;
8742 }
8743 continue;
8744 }
8745 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8746 c1 = amino_n[(int)*p1++];
8747 c2 = amino_n[(int)*p2++];
8748 vali += (double)mtx[c1][c2];
8749 }
8750 }
8751 free( s1 );
8752 free( s2 );
8753 // reporterr( "###vali = %d\n", vali );
8754 return( vali );
8755 }
8756
8757 double naivepairscore11( char *seq1, char *seq2, int penal )
8758 {
8759 double vali;
8760 int len = strlen( seq1 );
8761 char *s1, *s2, *p1, *p2;
8762
8763 s1 = calloc( len+1, sizeof( char ) );
8764 s2 = calloc( len+1, sizeof( char ) );
8765 {
8766 vali = 0.0;
8767 commongappickpair( s1, s2, seq1, seq2 );
8768 // reporterr( "###i1 = %s\n", s1 );
8769 // reporterr( "###i2 = %s\n", s2 );
8770 // reporterr( "###penal = %d\n", penal );
8771
8772 p1 = s1; p2 = s2;
8773 while( *p1 )
8774 {
8775 if( *p1 == '-' )
8776 {
8777 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8778 vali += (double)penal;
8779 // while( *p1 == '-' || *p2 == '-' )
8780 while( *p1 == '-' ) // SP
8781 {
8782 p1++;
8783 p2++;
8784 }
8785 continue;
8786 }
8787 if( *p2 == '-' )
8788 {
8789 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8790 vali += (double)penal;
8791 // while( *p2 == '-' || *p1 == '-' )
8792 while( *p2 == '-' ) // SP
8793 {
8794 p1++;
8795 p2++;
8796 }
8797 continue;
8798 }
8799 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8800 vali += (double)amino_dis[(int)*p1++][(int)*p2++];
8801 }
8802 }
8803 free( s1 );
8804 free( s2 );
8805 // reporterr( "###vali = %d\n", vali );
8806 return( vali );
8807 }
8808
8809 double naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
8810 {
8811 // return( 0.0 );
8812 int i, j;
8813 double val;
8814 int vali;
8815 double feff;
8816 int len = strlen( seq1[0] );
8817 char *s1, *s2, *p1, *p2;
8818 s1 = calloc( len+1, sizeof( char ) );
8819 s2 = calloc( len+1, sizeof( char ) );
8820 val = 0.0;
8821 for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
8822 {
8823 vali = 0;
8824 feff = eff1[i] * eff2[j];
8825 // reporterr( "feff %d-%d = %f\n", i, j, feff );
8826 commongappickpair( s1, s2, seq1[i], seq2[j] );
8827 // reporterr( "i1 = %s\n", seq1[i] );
8828 // reporterr( "i2 = %s\n", seq2[j] );
8829 // reporterr( "s1 = %s\n", s1 );
8830 // reporterr( "s2 = %s\n", s2 );
8831 // reporterr( "penal = %d\n", penal );
8832
8833 p1 = s1; p2 = s2;
8834 while( *p1 )
8835 {
8836 if( *p1 == '-' )
8837 {
8838 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8839 vali += penal;
8840 // while( *p1 == '-' || *p2 == '-' )
8841 while( *p1 == '-' ) // SP
8842 {
8843 p1++;
8844 p2++;
8845 }
8846 continue;
8847 }
8848 if( *p2 == '-' )
8849 {
8850 // reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
8851 vali += penal;
8852 // while( *p2 == '-' || *p1 == '-' )
8853 while( *p2 == '-' ) // SP
8854 {
8855 p1++;
8856 p2++;
8857 }
8858 continue;
8859 }
8860 // reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
8861 vali += amino_dis[(int)*p1++][(int)*p2++];
8862 }
8863 // reporterr( "vali = %d\n", vali );
8864 val += feff * vali;
8865 }
8866 free( s1 );
8867 free( s2 );
8868 reporterr( "val = %f\n", val );
8869 return( val );
8870 // exit( 1 );
8871 }
8872
8873 double plainscore( int nseq, char **s )
8874 {
8875 int i, j, ilim;
8876 double v = 0.0;
8877
8878 ilim = nseq-1;
8879 for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
8880 {
8881 v += (double)naivepairscore11( s[i], s[j], penalty );
8882 }
8883
8884 reporterr( "penalty = %d\n", penalty );
8885
8886 return( v );
8887 }
8888
8889 void intcat( int *s1, int *s2 )
8890 {
8891 while( *s1 != -1 ) s1++;
8892 while( *s2 != -1 )
8893 {
8894 // reporterr( "copying %d\n", *s2 );
8895 *s1++ = *s2++;
8896 }
8897 *s1 = -1;
8898 }
8899
8900 void intcpy( int *s1, int *s2 )
8901 {
8902 while( *s2 != -1 )
8903 {
8904 // reporterr( "copying %d\n", *s2 );
8905 *s1++ = *s2++;
8906 }
8907 *s1 = -1;
8908 }
8909
8910 void intncpy( int *s1, int *s2, int n )
8911 {
8912 while( n-- ) *s1++ = *s2++;
8913 }
8914
8915 void fltncpy( double *s1, double *s2, int n )
8916 {
8917 while( n-- ) *s1++ = *s2++;
8918 }
8919
8920 static int countmem( int *s )
8921 {
8922 int v = 0;
8923 while( *s++ != -1 ) v++;
8924 return( v );
8925 }
8926
8927 static int lastmem( int *s )
8928 {
8929 while( *s++ != -1 )
8930 ;
8931 return( *(s-2) );
8932 }
8933
8934
8935 int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign )
8936 {
8937 int i, j, mem0, mem1, posinnew, m;
8938 int nstep;
8939 int norg;
8940 double minscore, minscoreo, eff0, eff1, addedlen, tmpmin;
8941 int nearest, nearesto;
8942 int repnorg;
8943 int *leaf2node;
8944 int *additionaltopol;
8945 // double (*clusterfuncpt[1])(double,double);
8946 Bchain *ac, *acpt, *acori, *acnext, *acprev;
8947 int neighbor;
8948 char *neighborlist;
8949 char *npt;
8950 int reflen, nearestnode, nogaplentoadd;
8951 int *topoldum0 = NULL;
8952 int *topoldum1 = NULL;
8953 int *topolo0;
8954 int *topolo1;
8955 int seqlengthcondition;
8956 double sueff1_double_local = 1.0 - sueff_global;
8957 double sueff05_double_local = sueff_global * 0.5;
8958 // char **tree; //static?
8959 // char *treetmp; //static?
8960
8961 // for( i=0; i<njobc; i++ ) reporterr( "nogaplen of %d = %d\n", i+1, nogaplen[i] );
8962 //exit( 1 );
8963
8964
8965 // treetmp = AllocateCharVec( njob*150 );
8966 // tree = AllocateCharMtx( njob, njob*150 );
8967
8968 // sueff1_double = 1.0 - sueff_global;
8969 // sueff05_double = sueff_global * 0.5;
8970 // if ( treemethod == 'X' )
8971 // clusterfuncpt[0] = cluster_mix_double;
8972 // else if ( treemethod == 'E' )
8973 // clusterfuncpt[0] = cluster_average_double;
8974 // else if ( treemethod == 'q' )
8975 // clusterfuncpt[0] = cluster_minimum_double;
8976 // else
8977 // {
8978 // reporterr( "Unknown treemethod, %c\n", treemethod );
8979 // exit( 1 );
8980 // }
8981
8982 norg = njobc-1;
8983 nstep = njobc-2;
8984
8985 additionaltopol = (int *)calloc( 2, sizeof( int ) );
8986 leaf2node= (int *)calloc( norg, sizeof( int ) );
8987 if( treeout )
8988 {
8989 neighborlist = calloc( norg * 30, sizeof( char ) );
8990 }
8991 // for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
8992 if( !leaf2node )
8993 {
8994 reporterr( "Cannot allocate leaf2node.\n" );
8995 exit( 1 );
8996 }
8997 additionaltopol[0] = norg;
8998 additionaltopol[1] = -1;
8999
9000 ac = (Bchain *)malloc( norg * sizeof( Bchain ) );
9001 for( i=0; i<norg; i++ )
9002 {
9003 ac[i].next = ac+i+1;
9004 ac[i].prev = ac+i-1;
9005 ac[i].pos = i;
9006 }
9007 ac[norg-1].next = NULL;
9008
9009
9010 acori = (Bchain *)malloc( 1 * sizeof( Bchain ) );
9011 acori->next = ac;
9012 acori->pos = -1;
9013 ac[0].prev = acori;
9014
9015
9016 // for( i=0; i<nstep; i++ )
9017 // {
9018 // reporterr( "distfromtip = %f\n", dep[i].distfromtip );
9019 // }
9020 //
9021 // for( i=0; i<norg; i++ )
9022 // {
9023 // reporterr( "disttofrag(%d,%d) = %f\n", i, njobc-1, iscorec[i][norg-i] );
9024 // }
9025
9026
9027 minscore = 9999.9;
9028 nearest = -1;
9029 for( i=0; i<norg; i++ )
9030 {
9031 tmpmin = iscorec[i][norg-i];
9032 if( minscore > tmpmin )
9033 {
9034 minscore = tmpmin;
9035 nearest = i;
9036 }
9037 }
9038 nearesto = nearest;
9039 minscoreo = minscore;
9040
9041
9042
9043 // for( i=0; i<njobc-1; i++ ) for( j=i+1; j<njobc; j++ )
9044 // reporterr( "iscorec[%d][%d] = %f\n", i, j, iscorec[i][j-i] );
9045 // reporterr( "nearest = %d\n", nearest+1 );
9046 // reporterr( "nearesto = %d\n", nearesto+1 );
9047
9048 posinnew = 0;
9049 repnorg = -1;
9050 nogaplentoadd = nogaplen[norg];
9051
9052
9053
9054 for( i=0; i<norg; i++ ) leaf2node[i] = -1;
9055 for( i=0; i<nstep; i++ )
9056 {
9057 mem0 = topol[i][0][0];
9058 mem1 = topol[i][1][0];
9059 #if 0
9060 reporterr( "\n\nstep %d (old) \n", i );
9061
9062 reporterr( "group0 = \n" );
9063 for( j=0; topol[i][0][j]>-1; j++ )
9064 {
9065 reporterr( "%d ", topol[i][0][j]+1 );
9066 }
9067 reporterr( "\n" );
9068 reporterr( "len=%f\n", len[i][0] );
9069 reporterr( "group1 = \n" );
9070 for( j=0; topol[i][1][j]>-1; j++ )
9071 {
9072 reporterr( "%d ", topol[i][1][j]+1 );
9073 }
9074 reporterr( "\n" );
9075 reporterr( "len=%f\n", len[i][1] );
9076
9077 reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 );
9078 reporterr( "i = %d\n", i );
9079 if( leaf2node[nearest] == -1 )
9080 {
9081 reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] );
9082 }
9083 else
9084 {
9085 reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
9086 reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] );
9087 }
9088 #endif
9089 nearestnode = leaf2node[nearest];
9090 if( nearestnode == -1 )
9091 reflen = nogaplen[nearest];
9092 else
9093 reflen = alnleninnode[nearestnode];
9094 // reflen = alnleninnode[i]; // BUG!!
9095
9096 if( noalign ) seqlengthcondition = 1;
9097 else seqlengthcondition = ( nogaplentoadd <= reflen );
9098
9099 //seqlengthcondition = 1; // CHUUI
9100 //seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI
9101
9102 if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku.
9103 // if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau.
9104 {
9105 // reporterr( "INSERT HERE, %d-%d\n", nearest, norg );
9106 // reporterr( "nearest = %d\n", nearest );
9107 // reporterr( "\n\n\nminscore = %f\n", minscore );
9108 // reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 );
9109 // reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] );
9110
9111 if( nearestnode == -1 )
9112 {
9113 // reporterr( "INSERTING to 0!!!\n" );
9114 // reporterr( "lastlength = %d\n", nogaplen[norg] );
9115 // reporterr( "reflength = %d\n", nogaplen[nearest] );
9116 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) );
9117 topolc[posinnew][0][0] = nearest;
9118 topolc[posinnew][0][1] = -1;
9119
9120 addedlen = lenc[posinnew][0] = minscore / 2;
9121
9122 }
9123 else
9124 {
9125 // reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) );
9126 // reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] );
9127 // reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
9128
9129 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) );
9130 // reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] );
9131 intcpy( topolc[posinnew][0], topol[nearestnode][0] );
9132 intcat( topolc[posinnew][0], topol[nearestnode][1] );
9133 // addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!!
9134 addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10
9135 // fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] );
9136
9137 }
9138 neighbor = lastmem( topolc[posinnew][0] );
9139
9140 if( treeout )
9141 {
9142 #if 0
9143 fp = fopen( "infile.tree", "a" ); // kyougou!!
9144 if( fp == 0 )
9145 {
9146 reporterr( "File error!\n" );
9147 exit( 1 );
9148 }
9149 fprintf( fp, "\n" );
9150 fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
9151 fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
9152 fprintf( fp, " distance: %f\n", minscore );
9153 fprintf( fp, " cousin: " );
9154 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
9155 fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
9156 fprintf( fp, "\n" );
9157 fclose( fp );
9158 #else
9159 addtree[iadd].nearest = nearesto;
9160 addtree[iadd].dist1 = minscoreo;
9161 addtree[iadd].dist2 = minscore;
9162 neighborlist[0] = 0;
9163 npt = neighborlist;
9164 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
9165 {
9166 sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
9167 npt += strlen( npt );
9168 }
9169 addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
9170 strcpy( addtree[iadd].neighbors, neighborlist );
9171 #endif
9172 }
9173
9174 // reporterr( "INSERTING to 1!!!\n" );
9175 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) );
9176 topolc[posinnew][1][0] = norg;
9177 topolc[posinnew][1][1] = -1;
9178 lenc[posinnew][1] = minscore / 2;
9179
9180 // reporterr( "STEP %d (newnew)\n", posinnew );
9181 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
9182 // reporterr( "\n len=%f\n", lenc[posinnew][0] );
9183 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
9184 // reporterr( "\n len=%f\n", lenc[posinnew][1] );
9185
9186 repnorg = nearest;
9187
9188 // reporterr( "STEP %d\n", posinnew );
9189 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
9190 // reporterr( "\n len=%f\n", lenc[i][0] );
9191 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
9192 // reporterr( "\n len=%f\n", lenc[i][1] );
9193
9194 // im = topolc[posinnew][0][0];
9195 // jm = topolc[posinnew][1][0];
9196 // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
9197 // strcpy( tree[im], treetmp );
9198
9199 posinnew++;
9200 }
9201
9202 // reporterr( "minscore = %f\n", minscore );
9203 // reporterr( "distfromtip = %f\n", dep[i].distfromtip );
9204 // reporterr( "Modify matrix, %d-%d\n", nearest, norg );
9205 eff0 = iscorec[mem0][norg-mem0];
9206 eff1 = iscorec[mem1][norg-mem1];
9207
9208 // iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 );
9209 iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local;
9210 iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda
9211
9212 acprev = ac[mem1].prev;
9213 acnext = ac[mem1].next;
9214 acprev->next = acnext;
9215 if( acnext != NULL ) acnext->prev = acprev;
9216
9217 if( ( nearest == mem1 || nearest == mem0 ) )
9218 {
9219 minscore = 9999.9;
9220 // for( j=0; j<norg; j++ ) // sukoshi muda
9221 // {
9222 // if( minscore > iscorec[j][norg-j] )
9223 // {
9224 // minscore = iscorec[j][norg-j];
9225 // nearest = j;
9226 // }
9227 // }
9228 // reporterr( "searching on modified ac " );
9229 for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda
9230 {
9231 // reporterr( "." );
9232 j = acpt->pos;
9233 tmpmin = iscorec[j][norg-j];
9234 if( minscore > tmpmin )
9235 {
9236 minscore = tmpmin;
9237 nearest = j;
9238 }
9239 }
9240 // reporterr( "done\n" );
9241 }
9242
9243 // reporterr( "posinnew = %d\n", posinnew );
9244
9245
9246 if( topol[i][0][0] == repnorg )
9247 {
9248 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) );
9249 intcpy( topolc[posinnew][0], topol[i][0] );
9250 intcat( topolc[posinnew][0], additionaltopol );
9251 lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10
9252 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
9253 // fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] );
9254 // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] );
9255 addedlen = 0.0;
9256 }
9257 else
9258 {
9259 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) );
9260 intcpy( topolc[posinnew][0], topol[i][0] );
9261 lenc[posinnew][0] = len[i][0];
9262 }
9263
9264 if( topol[i][1][0] == repnorg )
9265 {
9266 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) );
9267 intcpy( topolc[posinnew][1], topol[i][1] );
9268 intcat( topolc[posinnew][1], additionaltopol );
9269 lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10
9270 // fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
9271 // fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] );
9272 // fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] );
9273 addedlen = 0.0;
9274
9275 repnorg = topolc[posinnew][0][0]; // juuyou
9276 }
9277 else
9278 {
9279 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
9280 intcpy( topolc[posinnew][1], topol[i][1] );
9281 lenc[posinnew][1] = len[i][1];
9282 }
9283
9284 // reporterr( "\nSTEP %d (new)\n", posinnew );
9285 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
9286 // reporterr( "\n len=%f\n", lenc[posinnew][0] );
9287 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
9288 // reporterr( "\n len=%f\n", lenc[posinnew][1] );
9289
9290 // reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] );
9291
9292 for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i;
9293 for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i;
9294
9295 // reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] );
9296
9297 // im = topolc[posinnew][0][0];
9298 // jm = topolc[posinnew][1][0];
9299 // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
9300 // strcpy( tree[im], treetmp );
9301 //
9302 // reporterr( "%s\n", treetmp );
9303
9304 posinnew++;
9305 }
9306
9307 if( nstep )
9308 {
9309 i--;
9310 topolo0 = topol[i][0];
9311 topolo1 = topol[i][1];
9312 }
9313 else
9314 {
9315 // i = 0;
9316 // free( topol[i][0] );//?
9317 // free( topol[i][1] );//?
9318 // topol[i][0] = calloc( 2, sizeof( int ) );
9319 // topol[i][1] = calloc( 1, sizeof( int ) );
9320 // topol[i][0][0] = 0;
9321 // topol[i][0][1] = -1;
9322 // topol[i][1][0] = -1;
9323
9324 topoldum0 = calloc( 2, sizeof( int ) );
9325 topoldum1 = calloc( 1, sizeof( int ) );
9326 topoldum0[0] = 0;
9327 topoldum0[1] = -1;
9328 topoldum1[0] = -1;
9329
9330 topolo0 = topoldum0;
9331 topolo1 = topoldum1;
9332 }
9333 if( repnorg == -1 )
9334 {
9335 // topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
9336 // intcpy( topolc[posinnew][0], topol[i][0] );
9337 // intcat( topolc[posinnew][0], topol[i][1] );
9338 topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) );
9339 intcpy( topolc[posinnew][0], topolo0 );
9340 intcat( topolc[posinnew][0], topolo1 );
9341 // lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken
9342 if( nstep )
9343 lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21
9344 else
9345 lenc[posinnew][0] = minscore / 2;
9346
9347 // reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip );
9348 // reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] );
9349
9350 topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) );
9351 intcpy( topolc[posinnew][1], additionaltopol );
9352 lenc[posinnew][1] = minscore / 2;
9353
9354 // neighbor = lastmem( topolc[posinnew][0] );
9355 neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji
9356
9357 if( treeout )
9358 {
9359 #if 0
9360 fp = fopen( "infile.tree", "a" ); // kyougou!!
9361 if( fp == 0 )
9362 {
9363 reporterr( "File error!\n" );
9364 exit( 1 );
9365 }
9366 fprintf( fp, "\n" );
9367 fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
9368 fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
9369 fprintf( fp, " cousin: " );
9370 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
9371 fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
9372 fprintf( fp, "\n" );
9373 fclose( fp );
9374 #else
9375 addtree[iadd].nearest = nearesto;
9376 addtree[iadd].dist1 = minscoreo;
9377 addtree[iadd].dist2 = minscore;
9378 neighborlist[0] = 0;
9379 npt = neighborlist;
9380 for( j=0; topolc[posinnew][0][j]!=-1; j++ )
9381 {
9382 sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
9383 npt += strlen( npt );
9384 }
9385 addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
9386 strcpy( addtree[iadd].neighbors, neighborlist );
9387 #endif
9388 }
9389
9390 // reporterr( "STEP %d\n", posinnew );
9391 // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
9392 // reporterr( "\n len=%f", lenc[posinnew][0] );
9393 // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
9394 // reporterr( "\n len=%f\n", lenc[posinnew][1] );
9395 }
9396
9397 if( topoldum0 ) free( topoldum0 );
9398 if( topoldum1 ) free( topoldum1 );
9399 free( leaf2node );
9400 free( additionaltopol );
9401 free( ac );
9402 free( acori );
9403 if( treeout ) free( neighborlist );
9404
9405 #if 0 // create a newick tree for CHECK
9406 char **tree;
9407 char *treetmp;
9408 int im, jm;
9409
9410 treetmp = AllocateCharVec( njob*150 );
9411 tree = AllocateCharMtx( njob, njob*150 );
9412 for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
9413
9414 for( i=0; i<njobc-1; i++ )
9415 {
9416 reporterr( "\nSTEP %d\n", i );
9417 for( j=0; topolc[i][0][j]!=-1; j++ ) reporterr( " %d", topolc[i][0][j] );
9418 reporterr( "\n len=%f\n", lenc[i][0] );
9419 for( j=0; topolc[i][1][j]!=-1; j++ ) reporterr( " %d", topolc[i][1][j] );
9420 reporterr( "\n len=%f\n", lenc[i][1] );
9421
9422 im = topolc[i][0][0];
9423 jm = topolc[i][1][0];
9424 sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[i][0], tree[jm], lenc[i][1] );
9425 strcpy( tree[im], treetmp );
9426
9427 }
9428
9429 reporterr( "%s\n", treetmp );
9430 FreeCharMtx( tree );
9431 free( treetmp );
9432 #endif
9433
9434 return( neighbor );
9435 }
9436
9437 #if 0
9438 int samemember( int *mem, int *cand )
9439 {
9440 int i, j;
9441
9442 #if 0
9443 reporterr( "mem = " );
9444 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
9445 reporterr( "\n" );
9446
9447 reporterr( "cand = " );
9448 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
9449 reporterr( "\n" );
9450 #endif
9451
9452 for( i=0, j=0; mem[i]>-1; )
9453 {
9454 if( mem[i++] != cand[j++] ) return( 0 );
9455 }
9456
9457 if( cand[j] == -1 )
9458 {
9459 return( 1 );
9460 }
9461 else
9462 {
9463 return( 0 );
9464 }
9465 }
9466 #else
9467 int samemember( int *mem, int *cand )
9468 {
9469 int i, j;
9470 int nm, nc;
9471
9472 nm = 0; for( i=0; mem[i]>-1; i++ ) nm++;
9473 nc = 0; for( i=0; cand[i]>-1; i++ ) nc++;
9474
9475 if( nm != nc ) return( 0 );
9476
9477 for( i=0; mem[i]>-1; i++ )
9478 {
9479 for( j=0; cand[j]>-1; j++ )
9480 if( mem[i] == cand[j] ) break;
9481 if( cand[j] == -1 ) return( 0 );
9482 }
9483
9484 if( mem[i] == -1 )
9485 {
9486 #if 0
9487 reporterr( "mem = " );
9488 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
9489 reporterr( "\n" );
9490
9491 reporterr( "cand = " );
9492 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
9493 reporterr( "\n" );
9494 #endif
9495 return( 1 );
9496 }
9497 else
9498 {
9499 return( 0 );
9500 }
9501 }
9502 #endif
9503
9504 int samemembern( int *mem, int *cand, int nc )
9505 {
9506 int i, j;
9507 int nm;
9508
9509 nm = 0;
9510 for( i=0; mem[i]>-1; i++ )
9511 {
9512 nm++;
9513 if( nm > nc ) return( 0 );
9514 }
9515
9516 if( nm != nc ) return( 0 );
9517
9518 for( i=0; mem[i]>-1; i++ )
9519 {
9520 for( j=0; j<nc; j++ )
9521 if( mem[i] == cand[j] ) break;
9522 if( j == nc ) return( 0 );
9523 }
9524
9525 if( mem[i] == -1 )
9526 {
9527 #if 0
9528 reporterr( "mem = " );
9529 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
9530 reporterr( "\n" );
9531
9532 reporterr( "cand = " );
9533 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
9534 reporterr( "\n" );
9535 #endif
9536 return( 1 );
9537 }
9538 else
9539 {
9540 return( 0 );
9541 }
9542 }
9543
9544
9545 int includemember( int *mem, int *cand ) // mem in cand
9546 {
9547 int i, j;
9548
9549 #if 0
9550 reporterr( "mem = " );
9551 for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
9552 reporterr( "\n" );
9553
9554 reporterr( "cand = " );
9555 for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
9556 reporterr( "\n" );
9557 #endif
9558
9559 for( i=0; mem[i]>-1; i++ )
9560 {
9561 for( j=0; cand[j]>-1; j++ )
9562 if( mem[i] == cand[j] ) break;
9563 if( cand[j] == -1 ) return( 0 );
9564 }
9565 // reporterr( "INCLUDED! mem[0]=%d\n", mem[0] );
9566 return( 1 );
9567 }
9568
9569 int overlapmember( int *mem1, int *mem2 )
9570 {
9571 int i, j;
9572
9573 for( i=0; mem1[i]>-1; i++ )
9574 for( j=0; mem2[j]>-1; j++ )
9575 if( mem1[i] == mem2[j] ) return( 1 );
9576 return( 0 );
9577 }
9578 void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth )
9579 {
9580 int i, j;
9581 double fr;
9582
9583 // for( i=0; i<lgth; i++ ) freq[i] = 0.0;
9584 // return;
9585
9586 for( i=0; i<lgth; i++ )
9587 {
9588 fr = 0.0;
9589 for( j=0; j<nseq; j++ )
9590 {
9591 if( seq[j][i] == '-' ) fr += eff[j];
9592 }
9593 freq[i] = fr;
9594 // reporterr( "freq[%d] = %f\n", i, freq[i] );
9595 }
9596 // reporterr( "\n" );
9597 return;
9598 }
9599
9600 void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth )
9601 {
9602 int i, j;
9603 double fr;
9604
9605 // for( i=0; i<lgth; i++ ) freq[i] = 0.0;
9606 // return;
9607
9608 for( i=0; i<lgth; i++ )
9609 {
9610 fr = 0.0;
9611 for( j=0; j<nseq; j++ )
9612 {
9613 if( seq[j][i] == '-' ) fr += eff[j];
9614 }
9615 freq[i] = fr;
9616 // reporterr( "freq[%d] = %f\n", i, freq[i] );
9617 }
9618 // reporterr( "\n" );
9619 return;
9620 }
9621
9622 void outgapcount( double *freq, int nseq, char *gappat, double *eff )
9623 {
9624 int j;
9625 double fr;
9626
9627 fr = 0.0;
9628 for( j=0; j<nseq; j++ )
9629 {
9630 if( gappat[j] == '-' ) fr += eff[j];
9631 }
9632 *freq = fr;
9633 return;
9634 }
9635
9636 double dist2offset( double dist )
9637 {
9638 double val = dist * 0.5 - specificityconsideration; // dist ha 0..2 dakara
9639 // double val = dist * 1.0 - specificityconsideration; // dist ha 0..2 dakara
9640 if( val > 0.0 ) val = 0.0;
9641 return val;
9642 }
9643
9644 void makedynamicmtx( double **out, double **in, double offset )
9645 {
9646 int i, j, ii, jj;
9647 double av;
9648
9649 offset = dist2offset( offset * 2.0 ); // offset 0..1 -> 0..2
9650
9651 // if( offset > 0.0 ) offset = 0.0;
9652 // reporterr( "dynamic offset = %f\n", offset );
9653
9654 for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
9655 {
9656 out[i][j] = in[i][j];
9657 }
9658 if( offset == 0.0 ) return;
9659
9660 for( i=0; i<nalphabets; i++ )
9661 {
9662 ii = (int)amino[i];
9663 if( ii == '-' ) continue; // text no toki arieru
9664 for( j=0; j<nalphabets; j++ )
9665 {
9666 jj = (int)amino[j];
9667 if( jj == '-' ) continue; // text no toki arieru
9668 out[i][j] = in[i][j] + offset * 600;
9669 // reporterr( "%c-%c: %f\n", ii, jj, out[i][j] );
9670 }
9671 }
9672
9673 // reporterr( "offset = %f\n", offset );
9674 // reporterr( "out[W][W] = %f\n", out[amino_n['W']][amino_n['W']] );
9675 // reporterr( "out[A][A] = %f\n", out[amino_n['A']][amino_n['A']] );
9676
9677
9678 return;
9679
9680 // Taikaku youso no heikin ga 600 ni naruyouni re-scale.
9681 // Hitaikaku youso ga ookiku narisugi.
9682
9683 av = 0.0;
9684 for( i=0; i<nalphabets; i++ )
9685 {
9686 if( ii == '-' ) continue; // text no toki arieru
9687 av += out[i][i];
9688 }
9689 av /= (double)nalphabets;
9690
9691 for( i=0; i<nalphabets; i++ )
9692 {
9693 if( amino[i] == '-' ) continue; // text no toki arieru
9694 for( j=0; j<nalphabets; j++ )
9695 {
9696 if( amino[j] == '-' ) continue; // text no toki arieru
9697 out[i][j] = out[i][j] * 600 / av;
9698 reporterr( "%c-%c: %f\n", amino[i], amino[j], out[i][j] );
9699 }
9700 }
9701 }
9702 void FreeCommonIP()
9703 {
9704 if( commonIP ) FreeIntMtx( commonIP );
9705 commonIP = NULL;
9706 commonAlloc1 = 0;
9707 commonAlloc2 = 0;
9708 }
9709
9710 void makeskiptable( int n, int **skip, char **seq )
9711 {
9712 char *nogapseq;
9713 int nogaplen, alnlen;
9714 int i, j, posinseq, gaplen;
9715
9716 nogapseq = calloc( strlen( seq[0] )+1, sizeof( char ) );
9717 for( i=0; i<n; i++ )
9718 {
9719 gappick0( nogapseq, seq[i] );
9720 nogaplen = strlen( nogapseq );
9721 alnlen = strlen( seq[i] );
9722 skip[i] = calloc( nogaplen+1, sizeof( int ) );
9723
9724 // reporterr( "%s\n", nogapseq );
9725
9726 posinseq = 0;
9727 gaplen = 0;
9728 for( j=0; j<alnlen; j++ )
9729 {
9730 if( seq[i][j] == '-' )
9731 {
9732 skip[i][posinseq]++;
9733 }
9734 else
9735 {
9736 posinseq++;
9737 }
9738 }
9739 // for( j=0; j<nogaplen+1; j++ )
9740 // reporterr( "%d ", skip[i][j] );
9741 // reporterr( "\n" );
9742 // exit( 1 );
9743 }
9744 free( nogapseq );
9745 }
9746
9747 int generatesubalignmentstable( int nseq, int ***tablept, int *nsubpt, int *maxmempt, int ***topol, double **len, double threshold )
9748 {
9749 int i, j, rep0, rep1, nmem, mem;
9750 double distfromtip0, distfromtip1;
9751 double *distfromtip;
9752 reporterr( "\n\n\n" );
9753
9754 *maxmempt = 0;
9755 *nsubpt = 0;
9756
9757 distfromtip = calloc( nseq, sizeof( double ) );
9758 for( i=0; i<nseq-1; i++ )
9759 {
9760 #if 0
9761 reporterr( "STEP %d\n", i );
9762 for( j=0; topol[i][0][j]!=-1; j++ )
9763 reporterr( "%3d ", topol[i][0][j] );
9764 reporterr( "\n" );
9765 reporterr( "len=%f\n", len[i][0] );
9766 #endif
9767
9768 rep0 = topol[i][0][0];
9769 distfromtip0 = distfromtip[rep0];
9770 distfromtip[rep0] += len[i][0];
9771 // reporterr( "distfromtip[%d] = %f->%f\n", rep0, distfromtip0, distfromtip[rep0] );
9772
9773
9774 #if 0
9775 for( j=0; topol[i][1][j]!=-1; j++ )
9776 reporterr( "%3d ", topol[i][1][j] );
9777 reporterr( "\n" );
9778 reporterr( "len=%f\n", len[i][1] );
9779 #endif
9780
9781 rep1 = topol[i][1][0];
9782 distfromtip1 = distfromtip[rep1];
9783 distfromtip[rep1] += len[i][1];
9784 // reporterr( "distfromtip[%d] = %f->%f\n", rep1, distfromtip1, distfromtip[rep1] );
9785
9786 if( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] )
9787 {
9788 // reporterr( "HIT 0!\n" );
9789 *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );
9790 for( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ )
9791 nmem++;
9792 // reporterr( "allocating %d\n", nmem+1 );
9793 (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );
9794 (*tablept)[*nsubpt+1] = NULL;
9795 intcpy( (*tablept)[*nsubpt], topol[i][0] );
9796 if( *maxmempt < nmem ) *maxmempt = nmem;
9797 *nsubpt += 1;
9798 }
9799
9800 if( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] )
9801 {
9802 // reporterr( "HIT 1!\n" );
9803 *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );
9804 for( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ )
9805 nmem++;
9806 // reporterr( "allocating %d\n", nmem+1 );
9807 (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );
9808 (*tablept)[*nsubpt+1] = NULL;
9809 intcpy( (*tablept)[*nsubpt], topol[i][1] );
9810 if( *maxmempt < nmem ) *maxmempt = nmem;
9811 *nsubpt += 1;
9812 }
9813
9814 }
9815
9816 if( distfromtip[0] <= threshold )
9817 {
9818 free( distfromtip );
9819 return( 1 );
9820 }
9821
9822 free( distfromtip );
9823 return( 0 );
9824 }
9825
9826
9827
9828 double sumofpairsscore( int nseq, char **seq )
9829 {
9830 double v = 0;
9831 int i, j;
9832 for( i=1; i<nseq; i++ )
9833 {
9834 for( j=0; j<i; j++ )
9835 {
9836 v += naivepairscore11( seq[i], seq[j], penalty ) / 600;
9837 }
9838 }
9839 // v /= ( (nseq-1) * nseq ) / 2;
9840 return( v );
9841 }
9842