1 #include "mltaln.h"
2
3 #define SMALLMEMORY 1
4
5 #define DEBUG 0
6 #define IODEBUG 0
7 #define SCOREOUT 0
8
9 static int nadd;
10 static int treein;
11 static int topin;
12 static int treeout;
13 static int distout;
14 static int noalign;
15 static int multidist;
16 static int maxdist = 2; // scale -> 2bai
17 static int allowlongadds;
18 static int keeplength;
19 static int ndeleted;
20 static int mapout;
21 static int smoothing;
22 static double hitout;
23
24 static double lenfaca, lenfacb, lenfacc, lenfacd;
25 static int tuplesize;
26
27 #define PLENFACA 0.01
28 #define PLENFACB 10000
29 #define PLENFACC 10000
30 #define PLENFACD 0.1
31 #define D6LENFACA 0.01
32 #define D6LENFACB 2500
33 #define D6LENFACC 2500
34 #define D6LENFACD 0.1
35 #define D10LENFACA 0.01
36 #define D10LENFACB 1000000
37 #define D10LENFACC 1000000
38 #define D10LENFACD 0.0
39
40 typedef struct _thread_arg
41 {
42 int njob;
43 int nadd;
44 int *nlen;
45 int *follows;
46 char **name;
47 char **seq;
48 LocalHom **localhomtable;
49 double **iscore;
50 double **nscore;
51 int *istherenewgap;
52 int **newgaplist;
53 RNApair ***singlerna;
54 double *eff_kozo_mapped;
55 int alloclen;
56 Treedep *dep;
57 int ***topol;
58 double **len;
59 Addtree *addtree;
60 int **deletelist;
61 #ifdef enablemultithread
62 int *iaddshare;
63 int thread_no;
64 pthread_mutex_t *mutex_counter;
65 #endif
66 } thread_arg_t;
67
68
69 #ifdef enablemultithread
70 typedef struct _gaplist2alnxthread_arg
71 {
72 // int thread_no;
73 int ncycle;
74 int *jobpospt;
75 int tmpseqlen;
76 int lenfull;
77 char **seq;
78 int *newgaplist;
79 int *posmap;
80 pthread_mutex_t *mutex;
81 } gaplist2alnxthread_arg_t;
82
83 typedef struct _distancematrixthread_arg
84 {
85 int thread_no;
86 int njob;
87 int norg;
88 int *jobpospt;
89 int **pointt;
90 int *nogaplen;
91 double **imtx;
92 double **nmtx;
93 double *selfscore;
94 pthread_mutex_t *mutex;
95 } distancematrixthread_arg_t;
96
97 typedef struct _jobtable2d
98 {
99 int i;
100 int j;
101 } Jobtable2d;
102
103 typedef struct _dndprethread_arg
104 {
105 int njob;
106 int thread_no;
107 double *selfscore;
108 double **mtx;
109 char **seq;
110 Jobtable2d *jobpospt;
111 pthread_mutex_t *mutex;
112 } dndprethread_arg_t;
113
114 #endif
115
116 typedef struct _blocktorealign
117 {
118 int start;
119 int end;
120 int nnewres;
121 } Blocktorealign;
122
cnctintvec(int * res,int * o1,int * o2)123 static void cnctintvec( int *res, int *o1, int *o2 )
124 {
125 while( *o1 != -1 ) *res++ = *o1++;
126 while( *o2 != -1 ) *res++ = *o2++;
127 *res = -1;
128 }
129
countnewres(int len,Blocktorealign * realign,int * posmap,int * gaplist)130 static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist )
131 {
132 int i, regstart, regend, len1;
133 regstart = 0;
134 len1 = len+1;
135 for( i=0; i<len1; i++ )
136 {
137
138 if( realign[i].nnewres || gaplist[i] )
139 {
140 regend = posmap[i]-1;
141 realign[i].start = regstart;
142 realign[i].end = regend;
143 }
144 if( gaplist[i] )
145 {
146 realign[i].nnewres++;
147 // fprintf( stderr, "hit? reg = %d-%d\n", regstart, regend );
148 }
149 regstart = posmap[i]+1;
150 }
151 }
fillgap(char * s,int len)152 static void fillgap( char *s, int len )
153 {
154 int orilen = strlen( s );
155 s += orilen;
156 len -= orilen;
157 while( len-- )
158 *s++ = '-';
159 *s = 0;
160 }
161
lencomp(const void * a,const void * b)162 static int lencomp( const void *a, const void *b ) // osoikamo
163 {
164 char **ast = (char **)a;
165 char **bst = (char **)b;
166 int lena = strlen( *ast );
167 int lenb = strlen( *bst );
168 // fprintf( stderr, "a=%s, b=%s\n", *ast, *bst );
169 // fprintf( stderr, "lena=%d, lenb=%d\n", lena, lenb );
170 if( lena > lenb ) return -1;
171 else if( lena < lenb ) return 1;
172 else return( 0 );
173 }
174
dorealignment_tree(Blocktorealign * block,char ** fullseq,int * fullseqlenpt,int norg,int *** topol,int * follows)175 static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows )
176 {
177 int i, j, k, posinold, newlen, *nmem;
178 int n0, n1, localloclen, nhit, hit1, hit2;
179 int *pickhistory;
180 int nprof1, nprof2, pos, zure;
181 char **prof1, **prof2;
182 int *iinf0, *iinf1;
183 int *group, *nearest, *g2n, ngroup;
184 char ***mem;
185 static char **tmpaln0 = NULL;
186 static char **tmpaln1 = NULL;
187 static char **tmpseq;
188 int ***topolpick;
189 int *tmpint;
190 int *intptr, *intptrx;
191 char *tmpseq0, *cptr, **cptrptr;
192
193
194 localloclen = 4 * ( block->end - block->start + 1 ); // ookisugi?
195 tmpaln0 = AllocateCharMtx( njob, localloclen );
196 tmpaln1 = AllocateCharMtx( njob, localloclen );
197 tmpseq = AllocateCharMtx( 1, *fullseqlenpt * 4 );
198 iinf0 = AllocateIntVec( njob );
199 iinf1 = AllocateIntVec( njob );
200 nearest = AllocateIntVec( njob ); // oosugi
201
202 posinold = block->start;
203
204 n0 = 0;
205 n1 = 0;
206 for( i=0; i<njob; i++ )
207 {
208 strncpy( tmpseq[0], fullseq[i] + block->start, block->end - block->start + 1 );
209 tmpseq[0][block->end - block->start + 1] = 0;
210 commongappick( 1, tmpseq );
211 if( tmpseq[0][0] != 0 )
212 {
213 if( i < norg )
214 {
215 fprintf( stderr, "BUG!!!!\n" );
216 exit( 1 );
217 }
218 strcpy( tmpaln0[n0], tmpseq[0] );
219 iinf0[n0] = i;
220 nearest[n0] = follows[i-norg];
221 n0++;
222 }
223 else
224 {
225 strcpy( tmpaln1[n0], "" );
226 iinf1[n1] = i;
227 n1++;
228 }
229 }
230 mem = AllocateCharCub( n0, n0+1, 0 ); // oosugi
231 nmem = AllocateIntVec( n0 ); // oosugi
232 g2n = AllocateIntVec( n0 ); // oosugi
233 group = AllocateIntVec( n0 ); // oosugi
234 for( i=0; i<n0; i++ ) mem[i][0] = NULL;
235 for( i=0; i<n0; i++ ) nmem[i] = 0;
236 ngroup = 0;
237 for( i=0; i<n0; i++ )
238 {
239 for( j=0; j<i; j++ ) if( nearest[j] == nearest[i] ) break;
240 if( j == i ) group[i] = ngroup++;
241 else group[i] = group[j];
242
243 for( j=0; mem[group[i]][j]; j++ )
244 ;
245 mem[group[i]][j] = tmpaln0[i];
246 mem[group[i]][j+1] = NULL;
247 nmem[group[i]]++;
248 g2n[group[i]] = nearest[i];
249 // fprintf( stderr, "%d -> %d -> group%d\n", i, nearest[i], group[i] );
250 // fprintf( stderr, "mem[%d][%d] = %s\n", group[i], j, mem[group[i]][j] );
251 }
252
253 for( i=0; i<ngroup; i++ )
254 {
255 // fprintf( stderr, "before sort:\n" );
256 // for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "%s\n", mem[i][j] );
257 // fprintf( stderr, "\n" );
258 qsort( mem[i], nmem[i], sizeof( char * ), lencomp );
259 // fprintf( stderr, "after sort:\n" );
260 // for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "%s\n", mem[i][j] );
261 // fprintf( stderr, "\n" );
262 }
263
264 #if 0
265 for( i=1; i<n0; i++ )
266 {
267 profilealignment( 1, n1, i, tmpaln0+i, tmpaln1, tmpaln0, localloclen, alg );
268 }
269 newlen = strlen( tmpaln0[0] );
270 for( i=0; i<n1; i++ ) eq2dash( tmpaln1[i] );
271 #else
272 // newlen = 0;
273 for( i=0; i<ngroup; i++ )
274 {
275 // for( k=0; mem[i][k]; k++ ) fprintf( stderr, "mem[%d][%d] = %s\n", i, j, mem[i][k] );
276
277 for( j=1; j<nmem[i]; j++ )
278 {
279 profilealignment2( 1, j, mem[i]+j, mem[i], localloclen, alg );
280 }
281 // for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "j=%d, %s\n", j, mem[i][j] );
282
283 #if 0 // iru
284 if( ( j = strlen( mem[i][0] ) ) > newlen ) newlen = j;
285 for( j=0; j<=i; j++ )
286 {
287 for( k=0; mem[j][k]; k++ )
288 fillgap( mem[j][k], newlen );
289 }
290 #endif
291
292 }
293 #if 0
294 fprintf( stderr, "After ingroupalignment (original order):\n" );
295 for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] );
296 #endif
297 #endif
298
299 topolpick = AllocateIntCub( ngroup, 2, ngroup );
300 pickhistory = AllocateIntVec( ngroup );
301 tmpint = AllocateIntVec( 2 );
302 prof1 = AllocateCharMtx( n0, 0 );
303 prof2 = AllocateCharMtx( n0, 0 );
304 for( i=0; i<ngroup; i++ )
305 {
306 topolpick[i][0][0] = -1;
307 topolpick[i][1][0] = -1;
308 pickhistory[i] = -1;
309 }
310
311 nhit = 0;
312 for( i=0; i<norg-1; i++ )
313 {
314 for( intptr=topol[i][0]; *intptr>-1; intptr++ )
315 {
316 for( intptrx=g2n,k=0; k<ngroup; intptrx++,k++ )
317 {
318 if( *intptr == *intptrx )
319 {
320 hit1 = k;
321 goto exitloop1;
322 }
323 }
324 }
325 continue;
326 exitloop1:
327 // fprintf( stderr, "hit1! group%d -> %d\n", k, topol[i][0][j] );
328
329 for( intptr=topol[i][1]; *intptr>-1; intptr++ )
330 {
331 for( intptrx=g2n,k=0; k<ngroup; intptrx++,k++ )
332 {
333 if( *intptr == *intptrx )
334 {
335 hit2 = k;
336 goto exitloop2;
337 }
338 }
339 }
340 continue;
341 exitloop2:
342
343 if( pickhistory[hit1] == -1 )
344 {
345 topolpick[nhit][0][0] = hit1;
346 topolpick[nhit][0][1] = -1;
347 }
348 else
349 {
350 intcpy( topolpick[nhit][0], topolpick[pickhistory[hit1]][0] );
351 intcat( topolpick[nhit][0], topolpick[pickhistory[hit1]][1] );
352 }
353 if( pickhistory[hit2] == -1 )
354 {
355 topolpick[nhit][1][0] = hit2;
356 topolpick[nhit][1][1] = -1;
357 }
358 else
359 {
360 intcpy( topolpick[nhit][1], topolpick[pickhistory[hit2]][0] );
361 intcat( topolpick[nhit][1], topolpick[pickhistory[hit2]][1] );
362 }
363
364 pickhistory[hit1] = nhit;
365 pickhistory[hit2] = nhit;
366 nhit++;
367 // g2n[hit1] = -1;
368 // g2n[hit2] = -1;
369
370 // fprintf( stderr, "hit2! group%d -> %d\n", k, topol[i][1][j] );
371
372 #if 0
373 fprintf( stderr, "\nHIT!!! \n" );
374 fprintf( stderr, "\nSTEP %d\n", i );
375 for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][0][j] );
376 fprintf( stderr, "\n" );
377 for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][1][j] );
378 fprintf( stderr, "\n" );
379 #endif
380 }
381
382 for( i=0; i<ngroup-1; i++ )
383 {
384 #if 0
385 fprintf( stderr, "\npickSTEP %d\n", i );
386 for( j=0; topolpick[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][0][j] );
387 fprintf( stderr, "\n" );
388 for( j=0; topolpick[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][1][j] );
389 fprintf( stderr, "\n" );
390 #endif
391
392 pos = 0;
393 // for( j=0; topolpick[i][0][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][0][j]][k]); k++ ) prof1[pos++] = cptr;
394 for( intptr=topolpick[i][0]; *intptr>-1; intptr++ )
395 for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ )
396 prof1[pos++] = cptr;
397 nprof1 = pos;
398 pos = 0;
399 // for( j=0; topolpick[i][1][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][1][j]][k]); k++ ) prof2[pos++] = cptr;
400 for( intptr=topolpick[i][1]; *intptr>-1; intptr++ )
401 for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ )
402 prof2[pos++] = cptr;
403 nprof2 = pos;
404
405
406 profilealignment2( nprof1, nprof2, prof1, prof2, localloclen, alg );
407 #if 0
408 for( j=0; j<nprof1; j++ ) fprintf( stderr, "prof1[%d] = %s\n", j, prof1[j] );
409 for( j=0; j<nprof2; j++ ) fprintf( stderr, "prof2[%d] = %s\n", j, prof2[j] );
410 fprintf( stderr, "done.\n" );
411 #endif
412 }
413 newlen = strlen( tmpaln0[0] );
414 for( j=0; j<n1; j++ ) fillgap( tmpaln1[j], newlen );
415
416 #if 0
417 fprintf( stderr, "After rerealignment (original order):\n" );
418 for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] );
419 #endif
420
421 // newlen = strlen( tmpaln0[0] );
422 zure = ( block->end - block->start + 1 - newlen );
423 // fprintf( stderr, "zure = %d, localloclen=%d, newlen=%d\n", zure, localloclen, newlen );
424
425
426 if( *fullseqlenpt < strlen( fullseq[0] ) - (block->end-block->start+1) + newlen + 1 )
427 {
428 *fullseqlenpt = strlen( fullseq[0] ) * 2;
429 fprintf( stderr, "reallocating..." );
430 for( i=0; i<njob; i++ )
431 {
432 fullseq[i] = realloc( fullseq[i], *fullseqlenpt * sizeof( char ) );
433 if( !fullseq[i] )
434 {
435 fprintf( stderr, "Cannot reallocate seq[][]\n" );
436 exit( 1 );
437 }
438 }
439 fprintf( stderr, "done.\n" );
440 }
441
442
443 tmpseq0 = tmpseq[0];
444 posinold = block->end+1;
445 for( i=0; i<n0; i++ )
446 {
447 strncpy( tmpseq0, tmpaln0[i], newlen );
448 strcpy( tmpseq0+newlen, fullseq[iinf0[i]] + posinold );
449 strcpy( fullseq[iinf0[i]]+block->start, tmpseq0 );
450 }
451 for( i=0; i<n1; i++ )
452 {
453 // eq2dash( tmpaln1[i] );
454 strncpy( tmpseq0, tmpaln1[i], newlen );
455 strcpy( tmpseq0+newlen, fullseq[iinf1[i]] + posinold );
456 strcpy( fullseq[iinf1[i]]+block->start, tmpseq0 );
457 }
458 FreeCharMtx( tmpaln0 );
459 FreeCharMtx( tmpaln1 );
460 FreeCharMtx( tmpseq );
461 for( i=0; i<n0; i++ )
462 {
463 // for( j=0; j<njob; j++ ) free( mem[i][j] );
464 free( mem[i] );
465 }
466 free( mem );
467 free( nmem );
468 free( iinf0 );
469 free( iinf1 );
470 free( group );
471 free( g2n );
472 free( prof1 );
473 free( prof2 );
474 free( nearest );
475 FreeIntCub( topolpick );
476 free( pickhistory );
477 free( tmpint );
478
479 return( zure );
480 }
481
482
483 #if 0
484 static int dorealignment( Blocktorealign *block, char **fullseq, int alloclen, int fullseqlen, int norg )
485 {
486 int i, posinnew, posinold, newlen;
487 int n0, n1;
488 int zure;
489 static int *iinf0, *iinf1;
490 static char **tmpaln0 = NULL;
491 static char **tmpaln1 = NULL;
492 static char **tmpseq;
493 char *opt, *npt;
494
495 if( tmpaln0 == NULL )
496 {
497 tmpaln0 = AllocateCharMtx( njob, alloclen );
498 tmpaln1 = AllocateCharMtx( njob, alloclen );
499 tmpseq = AllocateCharMtx( 1, fullseqlen );
500 iinf0 = AllocateIntVec( njob );
501 iinf1 = AllocateIntVec( njob );
502 }
503 posinold = block->start;
504
505
506 n0 = 0;
507 n1 = 0;
508 for( i=0; i<njob; i++ )
509 {
510 strncpy( tmpseq[0], fullseq[i] + block->start, block->end - block->start + 1 );
511 tmpseq[0][block->end - block->start + 1] = 0;
512 commongappick( 1, tmpseq );
513 // if( strlen( tmpseq[0] ) > 0 )
514 if( tmpseq[0][0] != 0 )
515 {
516 if( i < norg )
517 {
518 fprintf( stderr, "BUG!!!!\n" );
519 exit( 1 );
520 }
521 strcpy( tmpaln0[n0], tmpseq[0] );
522 iinf0[n0] = i;
523 n0++;
524 }
525 else
526 {
527 strcpy( tmpaln1[n0], "" );
528 iinf1[n1] = i;
529 n1++;
530 }
531 }
532
533
534 for( i=1; i<n0; i++ )
535 {
536 profilealignment( 1, n1, i, tmpaln0+i, tmpaln1, tmpaln0, alloclen, alg ); // n1 ha allgap
537 }
538
539 #if 1
540 fprintf( stderr, "After realignment:\n" );
541 for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] );
542 // for( i=0; i<n1; i++ ) fprintf( stderr, "%s\n", tmpaln1[i] );
543 #endif
544
545 newlen = strlen( tmpaln0[0] );
546 for( i=0; i<n0; i++ ) strncpy( fullseq[iinf0[i]]+block->start, tmpaln0[i], newlen );
547 for( i=0; i<n1; i++ )
548 {
549 eq2dash( tmpaln1[i] );
550 strncpy( fullseq[iinf1[i]] + block->start, tmpaln1[i], newlen );
551 }
552
553 posinold = block->end+1;
554 posinnew = block->start + newlen;
555
556
557 zure = ( block->end - block->start + 1 - strlen( tmpaln0[0] ) );
558
559 for( i=0; i<njob; i++ )
560 {
561 #if 0
562 strcpy( fullseq[i]+posinnew, fullseq[i]+posinold ); // ??
563 #else
564 opt = fullseq[i] + posinold;
565 npt = fullseq[i] + posinnew;
566 while( ( *npt++ = *opt++ ) );
567 *npt = 0;
568 #endif
569 }
570
571 return( zure );
572 }
573 #endif
574
adjustposmap(int len,int * posmap,int * gaplist)575 static void adjustposmap( int len, int *posmap, int *gaplist )
576 {
577 int *newposmap;
578 int *mpt1, *mpt2;
579 int lenbk, zure;
580 newposmap = calloc( len+2, sizeof( int ) );
581 lenbk = len;
582 zure = 0;
583 mpt1 = newposmap;
584 mpt2 = posmap;
585
586 #if 0
587 int i;
588 fprintf( stderr, "posmapa = " );
589 for( i=0; i<len+2; i++ )
590 {
591 fprintf( stderr, "%3d ", posmap[i] );
592 }
593 fprintf( stderr, "\n" );
594 #endif
595
596 while( len-- )
597 {
598 zure += *gaplist++;
599 *mpt1++ = *mpt2++ + zure;
600 }
601 zure += *gaplist++;
602 *mpt1 = *mpt2 + zure;
603
604 mpt1 = newposmap;
605 mpt2 = posmap;
606 len = lenbk;
607 while( len-- ) *mpt2++ = *mpt1++;
608 *mpt2 = *mpt1;
609 free( newposmap );
610 #if 0
611 fprintf( stderr, "posmapm = " );
612 for( i=0; i<lenbk+2; i++ )
613 {
614 fprintf( stderr, "%3d ", posmap[i] );
615 }
616 fprintf( stderr, "\n" );
617 #endif
618 }
619
insertgapsbyotherfragments_compact(int len,char * a,char * s,int * l,int * p)620 static int insertgapsbyotherfragments_compact( int len, char *a, char *s, int *l, int *p )
621 {
622 int gaplen;
623 int i, pos, pi;
624 int prevp = -1;
625 int realignment = 0;
626 // fprintf( stderr, "### insertgapsbyotherfragments\n" );
627 for( i=0; i<len; i++ )
628 {
629 gaplen = l[i];
630 pi = p[i];
631 pos = prevp + 1;
632 // fprintf( stderr, "gaplen = %d\n", gaplen );
633 while( gaplen-- )
634 {
635 pos++;
636 *a++ = *s++;
637 }
638 // fprintf( stderr, "pos = %d, pi = %d\n", pos, pi );
639 while( pos++ < pi )
640 {
641 *a++ = '=';
642 realignment = 1;
643 }
644 *a++ = *s++;
645 prevp = pi;
646 }
647 gaplen = l[i];
648 pi = p[i];
649 pos = prevp + 1;
650 while( gaplen-- )
651 {
652 pos++;
653 *a++ = *s++;
654 }
655 while( pos++ < pi )
656 {
657 *a++ = '=';
658 realignment = 1;
659 }
660 *a = 0;
661 return( realignment );
662 }
663
makegaplistcompact(int len,int * p,int * c,int * l)664 void makegaplistcompact( int len, int *p, int *c, int *l )
665 {
666 int i;
667 int pg;
668 int prep = -1;
669 for( i=0; i<len+2; i++ )
670 {
671 if( ( pg = p[i]-prep-1) > 0 && l[i] > 0 )
672 {
673 if( pg < l[i] )
674 {
675 c[i] = l[i] - pg;
676 }
677 else
678 {
679 c[i] = 0;
680 }
681 }
682 else
683 {
684 c[i] = l[i];
685 }
686 prep = p[i];
687 }
688 }
689
690
gaplist2alnx(int len,char * a,char * s,int * l,int * p,int lenlimit)691 void gaplist2alnx( int len, char *a, char *s, int *l, int *p, int lenlimit )
692 {
693 int gaplen;
694 int pos, pi, posl;
695 int prevp = -1;
696 int reslen = 0;
697 char *sp;
698 // char *abk = a;
699
700 #if 0
701 int i;
702 char *abk = a;
703 fprintf( stderr, "s = %s\n", s );
704 fprintf( stderr, "posmap = " );
705 for( i=0; i<len+2; i++ )
706 {
707 fprintf( stderr, "%3d ", p[i] );
708 }
709 fprintf( stderr, "\n" );
710 fprintf( stderr, "gaplist = " );
711 for( i=0; i<len+2; i++ )
712 {
713 fprintf( stderr, "%3d ", l[i] );
714 }
715 fprintf( stderr, "\n" );
716 #endif
717 while( len-- )
718 {
719 gaplen = *l++;
720 pi = *p++;
721
722 if( (reslen+=gaplen) > lenlimit )
723 {
724 fprintf( stderr, "Length over. Please recompile!\n" );
725 exit( 1 );
726 }
727 while( gaplen-- ) *a++ = '-';
728
729 pos = prevp + 1;
730 sp = s + pos;
731 if( ( posl = pi - pos ) )
732 {
733 if( ( reslen += posl ) > lenlimit )
734 {
735 fprintf( stderr, "Length over. Please recompile\n" );
736 exit( 1 );
737 }
738 while( posl-- ) *a++ = *sp++;
739 }
740
741 if( reslen++ > lenlimit )
742 {
743 fprintf( stderr, "Length over. Please recompile\n" );
744 exit( 1 );
745 }
746 *a++ = *sp;
747 prevp = pi;
748 }
749
750 gaplen = *l;
751 pi = *p;
752 if( (reslen+=gaplen) > lenlimit )
753 {
754 fprintf( stderr, "Length over. Please recompile\n" );
755 exit( 1 );
756 }
757 while( gaplen-- ) *a++ = '-';
758
759 pos = prevp + 1;
760 sp = s + pos;
761 if( ( posl = pi - pos ) )
762 {
763 if( ( reslen += posl ) > lenlimit )
764 {
765 fprintf( stderr, "Length over. Please recompile\n" );
766 exit( 1 );
767 }
768 while( posl-- ) *a++ = *sp++;
769 }
770 *a = 0;
771 // fprintf( stderr, "reslen = %d, strlen(a) = %d\n", reslen, strlen( abk ) );
772 // fprintf( stderr, "a = %s\n", abk );
773 }
774
makenewgaplist(int * l,char * a)775 static void makenewgaplist( int *l, char *a )
776 {
777 while( 1 )
778 {
779 while( *a == '=' )
780 {
781 a++;
782 (*l)++;
783 // fprintf( stderr, "a[] (i) = %s, *l=%d\n", a, *(l) );
784 }
785 *++l = 0;
786 if( *a == 0 ) break;
787 a++;
788 }
789 *l = -1;
790 }
791
792
arguments(int argc,char * argv[])793 void arguments( int argc, char *argv[] )
794 {
795 int c;
796
797 nthread = 1;
798 outnumber = 0;
799 scoreout = 0;
800 treein = 0;
801 topin = 0;
802 rnaprediction = 'm';
803 rnakozo = 0;
804 nevermemsave = 0;
805 inputfile = NULL;
806 addfile = NULL;
807 addprofile = 1;
808 fftkeika = 0;
809 constraint = 0;
810 nblosum = 62;
811 fmodel = 0;
812 calledByXced = 0;
813 devide = 0;
814 use_fft = 0; // chuui
815 force_fft = 0;
816 fftscore = 1;
817 fftRepeatStop = 0;
818 fftNoAnchStop = 0;
819 weight = 3;
820 utree = 1;
821 tbutree = 1;
822 refine = 0;
823 check = 1;
824 cut = 0.0;
825 disp = 0;
826 outgap = 1;
827 alg = 'A';
828 mix = 0;
829 tbitr = 0;
830 scmtd = 5;
831 tbweight = 0;
832 tbrweight = 3;
833 checkC = 0;
834 treemethod = 'X';
835 sueff_global = 0.1;
836 contin = 0;
837 scoremtx = 1;
838 kobetsubunkatsu = 0;
839 dorp = NOTSPECIFIED;
840 ppenalty = NOTSPECIFIED;
841 penalty_shift_factor = 1000.0;
842 ppenalty_ex = NOTSPECIFIED;
843 poffset = NOTSPECIFIED;
844 kimuraR = NOTSPECIFIED;
845 pamN = NOTSPECIFIED;
846 geta2 = GETA2;
847 fftWinSize = NOTSPECIFIED;
848 fftThreshold = NOTSPECIFIED;
849 RNAppenalty = NOTSPECIFIED;
850 RNAppenalty_ex = NOTSPECIFIED;
851 RNApthr = NOTSPECIFIED;
852 TMorJTT = JTT;
853 consweight_multi = 1.0;
854 consweight_rna = 0.0;
855 nadd = 0;
856 multidist = 0;
857 tuplesize = -1;
858 legacygapcost = 0;
859 allowlongadds = 0;
860 keeplength = 0;
861 mapout = 0;
862 smoothing = 0;
863 distout = 0;
864 hitout = 0.0;
865
866 while( --argc > 0 && (*++argv)[0] == '-' )
867 {
868 while ( ( c = *++argv[0] ) )
869 {
870 switch( c )
871 {
872 case 'i':
873 inputfile = *++argv;
874 fprintf( stderr, "inputfile = %s\n", inputfile );
875 --argc;
876 goto nextoption;
877 case 'I':
878 nadd = myatoi( *++argv );
879 fprintf( stderr, "nadd = %d\n", nadd );
880 --argc;
881 goto nextoption;
882 case 'e':
883 RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
884 --argc;
885 goto nextoption;
886 case 'o':
887 RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
888 --argc;
889 goto nextoption;
890 case 'f':
891 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
892 // fprintf( stderr, "ppenalty = %d\n", ppenalty );
893 --argc;
894 goto nextoption;
895 case 'Q':
896 penalty_shift_factor = atof( *++argv );
897 --argc;
898 goto nextoption;
899 case 'g':
900 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
901 fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
902 --argc;
903 goto nextoption;
904 case 'h':
905 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
906 // fprintf( stderr, "poffset = %d\n", poffset );
907 --argc;
908 goto nextoption;
909 case 'k':
910 kimuraR = myatoi( *++argv );
911 fprintf( stderr, "kappa = %d\n", kimuraR );
912 --argc;
913 goto nextoption;
914 case 'b':
915 nblosum = myatoi( *++argv );
916 scoremtx = 1;
917 fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
918 --argc;
919 goto nextoption;
920 case 'j':
921 pamN = myatoi( *++argv );
922 scoremtx = 0;
923 TMorJTT = JTT;
924 fprintf( stderr, "jtt/kimura %d\n", pamN );
925 --argc;
926 goto nextoption;
927 case 'm':
928 pamN = myatoi( *++argv );
929 scoremtx = 0;
930 TMorJTT = TM;
931 fprintf( stderr, "tm %d\n", pamN );
932 --argc;
933 goto nextoption;
934 case 'l':
935 fastathreshold = atof( *++argv );
936 constraint = 2;
937 --argc;
938 goto nextoption;
939 case 'r':
940 consweight_rna = atof( *++argv );
941 rnakozo = 1;
942 --argc;
943 goto nextoption;
944 case 'c':
945 consweight_multi = atof( *++argv );
946 --argc;
947 goto nextoption;
948 case 'C':
949 nthread = myatoi( *++argv );
950 fprintf( stderr, "nthread = %d\n", nthread );
951 --argc;
952 goto nextoption;
953 #if 0
954 case 'R':
955 rnaprediction = 'r';
956 break;
957 case 's':
958 RNAscoremtx = 'r';
959 break;
960 #endif
961 #if 1
962 case 'a':
963 fmodel = 1;
964 break;
965 #endif
966 case 'K':
967 addprofile = 0;
968 break;
969 case 'y':
970 distout = 1;
971 break;
972 case '^':
973 hitout = atof( *++argv );
974 --argc;
975 goto nextoption;
976 case 't':
977 treeout = 1;
978 break;
979 case 'T':
980 noalign = 1;
981 break;
982 case 'D':
983 dorp = 'd';
984 break;
985 case 'P':
986 dorp = 'p';
987 break;
988 #if 1
989 case 'O':
990 outgap = 0;
991 break;
992 #else
993 case 'O':
994 fftNoAnchStop = 1;
995 break;
996 #endif
997 case 'S':
998 scoreout = 1;
999 break;
1000 #if 0
1001 case 'e':
1002 fftscore = 0;
1003 break;
1004 case 'r':
1005 fmodel = -1;
1006 break;
1007 case 'R':
1008 fftRepeatStop = 1;
1009 break;
1010 case 's':
1011 treemethod = 's';
1012 break;
1013 #endif
1014 case 'X':
1015 treemethod = 'X';
1016 sueff_global = atof( *++argv );
1017 fprintf( stderr, "sueff_global = %f\n", sueff_global );
1018 --argc;
1019 goto nextoption;
1020 case 'E':
1021 treemethod = 'E';
1022 break;
1023 case 'q':
1024 treemethod = 'q';
1025 break;
1026 case 'n' :
1027 outnumber = 1;
1028 break;
1029 #if 0
1030 case 'a':
1031 alg = 'a';
1032 break;
1033 case 'Q':
1034 alg = 'Q';
1035 break;
1036 #endif
1037 case 'H':
1038 alg = 'H';
1039 break;
1040 case 'A':
1041 alg = 'A';
1042 break;
1043 case 'M':
1044 alg = 'M';
1045 break;
1046 case 'N':
1047 nevermemsave = 1;
1048 break;
1049 case 'B': // hitsuyou! memopt -M -B no tame
1050 break;
1051 case 'F':
1052 use_fft = 1;
1053 break;
1054 case 'G':
1055 force_fft = 1;
1056 use_fft = 1;
1057 break;
1058 case 'U':
1059 treein = 1;
1060 break;
1061 case 'V':
1062 allowlongadds = 1;
1063 break;
1064 case 'p':
1065 smoothing = 1;
1066 break;
1067 #if 0
1068 case 'V':
1069 topin = 1;
1070 break;
1071 #endif
1072 case 'u':
1073 tbrweight = 0;
1074 weight = 0;
1075 break;
1076 case 'v':
1077 tbrweight = 3;
1078 break;
1079 case 'd':
1080 multidist = 1;
1081 break;
1082 case 'W':
1083 tuplesize = myatoi( *++argv );
1084 --argc;
1085 goto nextoption;
1086 #if 0
1087 case 'd':
1088 disp = 1;
1089 break;
1090 #endif
1091 /* Modified 01/08/27, default: user tree */
1092 case 'J':
1093 tbutree = 0;
1094 break;
1095 /* modification end. */
1096 case 'z':
1097 fftThreshold = myatoi( *++argv );
1098 --argc;
1099 goto nextoption;
1100 case 'w':
1101 fftWinSize = myatoi( *++argv );
1102 --argc;
1103 goto nextoption;
1104 #if 0
1105 case 'Z':
1106 checkC = 1;
1107 break;
1108 #endif
1109 case 'L':
1110 legacygapcost = 1;
1111 break;
1112 case 'Y':
1113 keeplength = 1;
1114 break;
1115 case 'Z':
1116 mapout = 1;
1117 break;
1118 default:
1119 fprintf( stderr, "illegal option %c\n", c );
1120 argc = 0;
1121 break;
1122 }
1123 }
1124 nextoption:
1125 ;
1126 }
1127 if( argc == 1 )
1128 {
1129 cut = atof( (*argv) );
1130 argc--;
1131 }
1132 if( argc != 0 )
1133 {
1134 fprintf( stderr, "options: Check source file !\n" );
1135 exit( 1 );
1136 }
1137 if( tbitr == 1 && outgap == 0 )
1138 {
1139 fprintf( stderr, "conflicting options : o, m or u\n" );
1140 exit( 1 );
1141 }
1142 if( alg == 'C' && outgap == 0 )
1143 {
1144 fprintf( stderr, "conflicting options : C, o\n" );
1145 exit( 1 );
1146 }
1147 }
1148
1149
treebase(int nseq,int * nlen,char ** aseq,int nadd,char * mergeoralign,char ** mseq1,char ** mseq2,int *** topol,double * effarr,int * alloclen,LocalHom ** localhomtable,RNApair *** singlerna,double * effarr_kozo)1150 static double treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo )
1151 {
1152
1153 int i, l, m;
1154 int len1nocommongap, len2nocommongap;
1155 int len1, len2;
1156 int clus1, clus2;
1157 double pscore, tscore;
1158 char *indication1, *indication2;
1159 double *effarr1 = NULL;
1160 double *effarr2 = NULL;
1161 double *effarr1_kozo = NULL;
1162 double *effarr2_kozo = NULL;
1163 LocalHom ***localhomshrink = NULL;
1164 int *fftlog;
1165 int m1, m2;
1166 int *gaplen;
1167 int *gapmap;
1168 int *alreadyaligned;
1169 // double dumfl = 0.0;
1170 double dumdb = 0.0;
1171 int ffttry;
1172 RNApair ***grouprna1, ***grouprna2;
1173
1174 if( rnakozo && rnaprediction == 'm' )
1175 {
1176 grouprna1 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) );
1177 grouprna2 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) );
1178 }
1179 else
1180 {
1181 grouprna1 = grouprna2 = NULL;
1182 }
1183
1184 fftlog = AllocateIntVec( nseq );
1185 effarr1 = AllocateDoubleVec( nseq );
1186 effarr2 = AllocateDoubleVec( nseq );
1187 indication1 = AllocateCharVec( 150 );
1188 indication2 = AllocateCharVec( 150 );
1189 alreadyaligned = AllocateIntVec( nseq );
1190 if( constraint )
1191 {
1192 localhomshrink = (LocalHom ***)calloc( nseq, sizeof( LocalHom ** ) );
1193 #if SMALLMEMORY
1194 if( multidist )
1195 {
1196 for( i=0; i<nseq; i++) localhomshrink[i] = (LocalHom **)calloc( 1, sizeof( LocalHom *) );
1197 }
1198 else
1199 #endif
1200 {
1201 for( i=0; i<nseq; i++) localhomshrink[i] = (LocalHom **)calloc( nseq, sizeof( LocalHom *) );
1202 }
1203 }
1204 effarr1_kozo = AllocateDoubleVec( nseq ); //tsuneni allocate sareru.
1205 effarr2_kozo = AllocateDoubleVec( nseq ); //tsuneni allocate sareru.
1206 for( i=0; i<nseq; i++ ) effarr1_kozo[i] = 0.0;
1207 for( i=0; i<nseq; i++ ) effarr2_kozo[i] = 0.0;
1208
1209 gaplen = AllocateIntVec( *alloclen+10 ); // maikai shokika
1210 gapmap = AllocateIntVec( *alloclen+10 ); // maikai shokika
1211 for( i=0; i<nseq-1; i++ ) alreadyaligned[i] = 1;
1212 alreadyaligned[nseq-1] = 0;
1213
1214 for( l=0; l<nseq; l++ ) fftlog[l] = 1;
1215
1216
1217 if( constraint )
1218 {
1219 #if SMALLMEMORY
1220 if( multidist )
1221 dontcalcimportance_firstone( nseq, effarr, aseq, localhomtable );
1222 else
1223 calcimportance( nseq, effarr, aseq, localhomtable );
1224 #else
1225 calcimportance( nseq, effarr, aseq, localhomtable );
1226 #endif
1227 }
1228
1229 tscore = 0.0;
1230 for( l=0; l<nseq-1; l++ )
1231 {
1232 if( mergeoralign[l] == 'n' )
1233 {
1234 // fprintf( stderr, "SKIP!\n" );
1235 #if 0
1236 free( topol[l][0] );
1237 free( topol[l][1] );
1238 free( topol[l] );
1239 #endif
1240 continue;
1241 }
1242
1243 m1 = topol[l][0][0];
1244 m2 = topol[l][1][0];
1245 len1 = strlen( aseq[m1] );
1246 len2 = strlen( aseq[m2] );
1247 if( *alloclen < len1 + len2 )
1248 {
1249 #if 0
1250 fprintf( stderr, "\nReallocating.." );
1251 *alloclen = ( len1 + len2 ) + 1000;
1252 ReallocateCharMtx( aseq, nseq, *alloclen + 10 );
1253 gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
1254 if( gaplen == NULL )
1255 {
1256 fprintf( stderr, "Cannot realloc gaplen\n" );
1257 exit( 1 );
1258 }
1259 gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
1260 if( gapmap == NULL )
1261 {
1262 fprintf( stderr, "Cannot realloc gapmap\n" );
1263 exit( 1 );
1264 }
1265 fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
1266 #else
1267 fprintf( stderr, "Length over!\n" );
1268 exit( 1 );
1269 #endif
1270 }
1271
1272 if( effarr_kozo )
1273 {
1274 clus1 = fastconjuction_noname_kozo( topol[l][0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
1275 clus2 = fastconjuction_noname_kozo( topol[l][1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
1276 }
1277 else
1278 {
1279 clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
1280 clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
1281 }
1282
1283 if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
1284 {
1285 newgapstr = "=";
1286 }
1287 else
1288 newgapstr = "-";
1289
1290
1291 len1nocommongap = len1;
1292 len2nocommongap = len2;
1293 if( mergeoralign[l] == '1' ) // nai
1294 {
1295 findcommongaps( clus2, mseq2, gapmap );
1296 commongappick( clus2, mseq2 );
1297 len2nocommongap = strlen( mseq2[0] );
1298 }
1299 else if( mergeoralign[l] == '2' )
1300 {
1301 findcommongaps( clus1, mseq1, gapmap );
1302 commongappick( clus1, mseq1 );
1303 len1nocommongap = strlen( mseq1[0] );
1304 }
1305
1306
1307 // fprintf( trap_g, "\nSTEP-%d\n", l );
1308 // fprintf( trap_g, "group1 = %s\n", indication1 );
1309 // fprintf( trap_g, "group2 = %s\n", indication2 );
1310 //
1311 #if 1
1312 // fprintf( stderr, "\rSTEP % 5d /%d ", l+1, nseq-1 );
1313 // fflush( stderr );
1314 #else
1315 fprintf( stdout, "STEP %d /%d\n", l+1, nseq-1 );
1316 fprintf( stderr, "STEP %d /%d\n", l+1, nseq-1 );
1317 fprintf( stderr, "group1 = %.66s", indication1 );
1318 if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
1319 fprintf( stderr, "\n" );
1320 fprintf( stderr, "group2 = %.66s", indication2 );
1321 if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
1322 fprintf( stderr, "\n" );
1323 #endif
1324
1325
1326
1327 // for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
1328
1329 if( constraint )
1330 {
1331 #if SMALLMEMORY
1332 if( multidist )
1333 {
1334 fastshrinklocalhom_one( topol[l][0], topol[l][1], nseq-1, localhomtable, localhomshrink );
1335 }
1336 else
1337 #endif
1338 {
1339 fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
1340 }
1341
1342 // msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
1343 // fprintf( stdout, "localhomshrink =\n" );
1344 // outlocalhompt( localhomshrink, clus1, clus2 );
1345 // weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
1346 // fprintf( stderr, "after weight =\n" );
1347 // outlocalhompt( localhomshrink, clus1, clus2 );
1348 }
1349 if( rnakozo && rnaprediction == 'm' )
1350 {
1351 makegrouprna( grouprna1, singlerna, topol[l][0] );
1352 makegrouprna( grouprna2, singlerna, topol[l][1] );
1353 }
1354
1355
1356 /*
1357 fprintf( stderr, "before align all\n" );
1358 display( aseq, nseq );
1359 fprintf( stderr, "\n" );
1360 fprintf( stderr, "before align 1 %s \n", indication1 );
1361 display( mseq1, clus1 );
1362 fprintf( stderr, "\n" );
1363 fprintf( stderr, "before align 2 %s \n", indication2 );
1364 display( mseq2, clus2 );
1365 fprintf( stderr, "\n" );
1366 */
1367
1368
1369 if( !nevermemsave && ( constraint != 2 && alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
1370 {
1371 fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
1372 alg = 'M';
1373 if( commonIP ) FreeIntMtx( commonIP );
1374 commonIP = NULL; // 2013/Jul17
1375 commonAlloc1 = 0;
1376 commonAlloc2 = 0;
1377 }
1378
1379
1380 // if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
1381 if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
1382 else ffttry = 0;
1383 // ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
1384 // fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
1385 // fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
1386 if( constraint == 2 )
1387 {
1388 if( alg == 'M' )
1389 {
1390 fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" );
1391 exit( 1 );
1392 }
1393 fprintf( stderr, "c" );
1394 if( alg == 'A' )
1395 {
1396 imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
1397 if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
1398 pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1399 }
1400 else if( alg == 'Q' )
1401 {
1402 fprintf( stderr, "Q has been disabled.\n" );
1403 exit( 1 );
1404 }
1405 }
1406 else if( force_fft || ( use_fft && ffttry ) )
1407 {
1408 fprintf( stderr, "f" );
1409 if( alg == 'M' )
1410 {
1411 fprintf( stderr, "m" );
1412 pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
1413 }
1414 else
1415 pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
1416 }
1417 else
1418 {
1419 fprintf( stderr, "d" );
1420 fftlog[m1] = 0;
1421 switch( alg )
1422 {
1423 case( 'a' ):
1424 pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
1425 break;
1426 case( 'M' ):
1427 fprintf( stderr, "m" );
1428 pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1429 break;
1430 case( 'A' ):
1431 pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
1432 break;
1433 default:
1434 ErrorExit( "ERROR IN SOURCE FILE" );
1435 }
1436 }
1437
1438
1439 nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
1440
1441 // fprintf( stderr, "aseq[last] = %s\n", aseq[nseq-1] );
1442
1443 #if SCOREOUT
1444 fprintf( stderr, "score = %10.2f\n", pscore );
1445 #endif
1446 tscore += pscore;
1447 #if 0 // New gaps = '='
1448 fprintf( stderr, "Original msa\n" );
1449 for( i=0; i<clus1; i++ )
1450 fprintf( stderr, "%s\n", mseq1[i] );
1451 fprintf( stderr, "Query\n" );
1452 for( i=0; i<clus2; i++ )
1453 fprintf( stderr, "%s\n", mseq2[i] );
1454 #endif
1455
1456 // writePre( nseq, name, nlen, aseq, 0 );
1457
1458 if( disp ) display( aseq, nseq );
1459
1460 if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
1461 {
1462 // if( deleteadditionalinsertions ) ndeleted += deletenewinsertions( clus2, clus1, mseq2, mseq1, deleterecord );
1463 adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
1464 restorecommongaps( nseq, nseq-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
1465 findnewgaps( clus2, 0, mseq2, gaplen );
1466 insertnewgaps( nseq, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' );
1467 // for( i=0; i<nseq; i++ ) eq2dash( aseq[i] );
1468 for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
1469 }
1470 if( mergeoralign[l] == '2' )
1471 {
1472 // if( deleteadditionalinsertions ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, deleterecord );
1473 adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
1474 restorecommongaps( nseq, nseq-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
1475 findnewgaps( clus1, 0, mseq1, gaplen );
1476 insertnewgaps( nseq, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' );
1477 // for( i=0; i<nseq; i++ ) eq2dash( aseq[i] );
1478 for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
1479 }
1480
1481 #if 0
1482 free( topol[l][0] );
1483 free( topol[l][1] );
1484 free( topol[l] );
1485 #endif
1486 }
1487
1488 //for( i=0; i<nseq; nseq++ )
1489 //reporterr( "In treebase() before deletenewinsertions, %s\n", aseq[i] );
1490 // if( keeplength ) ndeleted += deletenewinsertions_withoutusingequal( nseq-1, 1, 0, aseq, aseq+nseq-1, NULL, deletemapiadd, deletelagiadd, deletelistiadd );
1491
1492 #if SCOREOUT
1493 fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
1494 #endif
1495 free( gaplen );
1496 free( gapmap );
1497 if( rnakozo && rnaprediction == 'm' )
1498 {
1499 free( grouprna1 );
1500 free( grouprna2 );
1501 }
1502 free( fftlog ); // iranai
1503 free( effarr1 );
1504 free( effarr2 );
1505 free( indication1 );
1506 free( indication2 );
1507 free( alreadyaligned );
1508 if( constraint )
1509 {
1510 for( i=0; i<nseq; i++ ) free( localhomshrink[i] ); // ??
1511 free( localhomshrink );
1512 }
1513 free( effarr1_kozo );
1514 free( effarr2_kozo );
1515
1516
1517 return( pscore );
1518 }
1519
1520
1521
1522
mtxcpy(int norg,int njobc,double *** iscorec,double ** iscore)1523 static void mtxcpy( int norg, int njobc, double ***iscorec, double **iscore )
1524 {
1525 int i, nlim, n;
1526 double *fpt, *fptc;
1527
1528 *iscorec = AllocateFloatHalfMtx( njobc );
1529 nlim = norg-1;
1530 for( i=0; i<nlim; i++ )
1531 {
1532 fptc = (*iscorec)[i]+1;
1533 fpt = iscore[i]+1;
1534 n = norg-i-1;
1535 while( n-- )
1536 *fptc++ = *fpt++;
1537 // for( j=i+1; j<norg; j++ )
1538 // (*iscorec)[i][j-i] = iscore[i][j-i];
1539 }
1540 }
1541
1542
addsinglethread(void * arg)1543 static void *addsinglethread( void *arg )
1544 {
1545 thread_arg_t *targ = (thread_arg_t *)arg;
1546 int *nlenc = NULL;
1547 char **namec = NULL;
1548 Treedep *depc = NULL;
1549 char **mseq1 = NULL, **mseq2 = NULL;
1550 double **iscorec;
1551 // double **iscorecbk; // to speedup
1552 double *effc = NULL;
1553 int ***topolc = NULL;
1554 double **lenc = NULL;
1555 LocalHom **localhomtablec = NULL;
1556 int *memlist0 = NULL;
1557 int *memlist1 = NULL;
1558 int *addmem = NULL;
1559 int njobc, norg;
1560 char **bseq = NULL;
1561 int i, j, k, m, iadd, rep, neighbor;
1562 char *mergeoralign = NULL;
1563 int *nogaplenjusttodecideaddhereornot = NULL;
1564 char *tmpseq = NULL;
1565
1566 #ifdef enablemultithread
1567 int thread_no = targ->thread_no;
1568 int *iaddshare = targ->iaddshare;
1569 #endif
1570 int njob = targ->njob;
1571 int *follows = targ->follows;
1572 int nadd = targ->nadd;
1573 int *nlen = targ->nlen;
1574 char **name = targ->name;
1575 char **seq = targ->seq;
1576 LocalHom **localhomtable = targ->localhomtable;
1577 double **iscore = targ->iscore;
1578 double **nscore = targ->nscore;
1579 int *istherenewgap = targ->istherenewgap;
1580 int **newgaplist = targ->newgaplist;
1581 RNApair ***singlerna = targ->singlerna;
1582 double *eff_kozo_mapped = targ->eff_kozo_mapped;
1583 int alloclen = targ->alloclen;
1584 Treedep *dep = targ->dep;
1585 int ***topol = targ->topol;
1586 double **len = targ->len;
1587 Addtree *addtree = targ->addtree;
1588 int **deletelist = targ->deletelist;
1589 double pscore;
1590 int *alnleninnode = NULL;
1591 char *targetseq;
1592
1593
1594
1595 // fprintf( stderr, "\nPreparing thread %d\n", thread_no );
1596 norg = njob - nadd;
1597 njobc = norg+1;
1598
1599 alnleninnode = AllocateIntVec( norg );
1600 addmem = AllocateIntVec( nadd+1 );
1601 depc = (Treedep *)calloc( njobc, sizeof( Treedep ) );
1602 mseq1 = AllocateCharMtx( njob, 0 );
1603 mseq2 = AllocateCharMtx( njob, 0 );
1604 bseq = AllocateCharMtx( njobc, alloclen );
1605 namec = AllocateCharMtx( njob, 0 );
1606 nlenc = AllocateIntVec( njob );
1607 mergeoralign = AllocateCharVec( njob );
1608 nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc );
1609 tmpseq = calloc( alloclen, sizeof( char ) );
1610
1611 if( allowlongadds ) // hontou ha iranai.
1612 {
1613 for( i=0; i<njobc; i++ ) nogaplenjusttodecideaddhereornot[i] = 0;
1614 }
1615 else
1616 {
1617 for( i=0; i<norg; i++ )
1618 {
1619 gappick0( tmpseq, seq[i] );
1620 nogaplenjusttodecideaddhereornot[i] = strlen( tmpseq );
1621 }
1622 }
1623
1624 for( i=0; i<norg; i++ ) strcpy( bseq[i], seq[i] );
1625 if( norg == 1 )
1626 {
1627 alnleninnode[0] = strlen( bseq[0] );
1628 }
1629 else
1630 {
1631 for( i=norg-2; i>=0; i-- )
1632 // for( i=norg-2; i; i-- ) // BUG!!!!
1633 {
1634 // reporterr( "\nstep %d\n", i );
1635 k = 0;
1636 for( j=0; (m=topol[i][0][j])!=-1; j++ )
1637 {
1638 mseq1[k++] = bseq[m];
1639 // reporterr( "%d ", m );
1640 }
1641 for( j=0; (m=topol[i][1][j])!=-1; j++ )
1642 {
1643 mseq1[k++] = bseq[m];
1644 // reporterr( "%d ", m );
1645 }
1646 // reporterr( "\n" );
1647 commongappick( k, mseq1 );
1648 alnleninnode[i] = strlen( mseq1[0] );
1649 // fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] );
1650 }
1651 }
1652 // for( i=0; i<norg-1; i++ )
1653 // fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] );
1654
1655
1656 if( constraint )
1657 {
1658 localhomtablec = (LocalHom **)calloc( njobc, sizeof( LocalHom *) ); // motto chiisaku dekiru.
1659 #if SMALLMEMORY
1660 if( multidist )
1661 {
1662 for( i=0; i<njobc; i++) localhomtablec[i] = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // motto chiisaku dekiru.
1663 }
1664 else
1665 #endif
1666 {
1667 for( i=0; i<njobc; i++) localhomtablec[i] = (LocalHom *)calloc( njobc, sizeof( LocalHom ) ); // motto chiisaku dekiru.
1668 for( i=0; i<norg; i++ ) for( j=0; j<norg; j++ ) localhomtablec[i][j] = localhomtable[i][j]; // iru!
1669 }
1670 }
1671
1672
1673 topolc = AllocateIntCub( njobc, 2, 0 );
1674 lenc = AllocateFloatMtx( njobc, 2 );
1675 effc = AllocateDoubleVec( njobc );
1676 // for( i=0; i<norg; i++ ) nlenc[i] = strlen( seq[i] );
1677 for( i=0; i<norg; i++ ) nlenc[i] = nlen[i];
1678 for( i=0; i<norg; i++ ) namec[i] = name[i];
1679 memlist0 = AllocateIntVec( norg+1 );
1680 memlist1 = AllocateIntVec( 2 );
1681 for( i=0; i<norg; i++ ) memlist0[i] = i;
1682 memlist0[norg] = -1;
1683
1684 // fprintf( stderr, "\ndone. %d\n", thread_no );
1685
1686 // mtxcpy( norg, norg, &iscorecbk, iscore ); // to speedup?
1687
1688
1689 iadd = -1;
1690 while( 1 )
1691 {
1692 #ifdef enablemultithread
1693 if( nthread )
1694 {
1695 pthread_mutex_lock( targ->mutex_counter );
1696 iadd = *iaddshare;
1697 if( iadd == nadd )
1698 {
1699 pthread_mutex_unlock( targ->mutex_counter );
1700 break;
1701 }
1702 fprintf( stderr, "\r%d / %d (thread %d) \r", iadd, nadd, thread_no );
1703 ++(*iaddshare);
1704 targetseq = seq[norg+iadd];
1705 pthread_mutex_unlock( targ->mutex_counter );
1706 }
1707 else
1708 #endif
1709 {
1710 iadd++;
1711 if( iadd == nadd ) break;
1712 targetseq = seq[norg+iadd];
1713 fprintf( stderr, "\r%d / %d \r", iadd, nadd );
1714 }
1715
1716 for( i=0; i<norg; i++ ) strcpy( bseq[i], seq[i] );
1717 // gappick0( bseq[norg], seq[norg+iadd] );
1718 gappick0( bseq[norg], targetseq );
1719
1720 if( allowlongadds ) // missed in v7.220
1721 nogaplenjusttodecideaddhereornot[norg] = 0;
1722 else
1723 nogaplenjusttodecideaddhereornot[norg] = strlen( bseq[norg] );
1724
1725 mtxcpy( norg, njobc, &iscorec, iscore );
1726
1727 if( multidist || tuplesize > 0 )
1728 {
1729 for( i=0; i<norg; i++ ) iscorec[i][norg-i] = nscore[i][iadd];
1730 }
1731 else
1732 {
1733 for( i=0; i<norg; i++ ) iscorec[i][norg-i] = iscore[i][norg+iadd-i];
1734 }
1735
1736
1737 #if 0
1738 for( i=0; i<njobc-1; i++ )
1739 {
1740 fprintf( stderr, "i=%d\n", i );
1741 for( j=i+1; j<njobc; j++ )
1742 {
1743 fprintf( stderr, "%d-%d, %f\n", i, j, iscorec[i][j-i] );
1744 }
1745 }
1746 #endif
1747 nlenc[norg] = nlen[norg+iadd];
1748 namec[norg] = name[norg+iadd];
1749 if( constraint)
1750 {
1751 for( i=0; i<norg; i++ )
1752 {
1753 #if SMALLMEMORY
1754 if( multidist )
1755 {
1756 localhomtablec[i][0] = localhomtable[i][iadd];
1757 // localhomtablec[norg][i] = localhomtable[norg+iadd][i];
1758 }
1759 else
1760 #endif
1761 {
1762 localhomtablec[i][norg] = localhomtable[i][norg+iadd];
1763 localhomtablec[norg][i] = localhomtable[norg+iadd][i];
1764 }
1765 }
1766 // localhomtablec[norg][norg] = localhomtable[norg+iadd][norg+iadd]; // iranai!!
1767 }
1768
1769 // fprintf( stderr, "Constructing a UPGMA tree %d ... ", iadd );
1770 // fflush( stderr );
1771
1772
1773 // if( iadd == 0 )
1774 // {
1775 // }
1776 // fixed_musclesupg_double_realloc_nobk_halfmtx( njobc, iscorec, topolc, lenc, depc, 0, 1 );
1777 neighbor = addonetip( njobc, topolc, lenc, iscorec, topol, len, dep, treeout, addtree, iadd, name, alnleninnode, nogaplenjusttodecideaddhereornot, noalign );
1778
1779
1780 FreeFloatHalfMtx( iscorec, njobc );
1781
1782
1783 if( tbrweight )
1784 {
1785 weight = 3;
1786 counteff_simple_double_nostatic( njobc, topolc, lenc, effc );
1787 }
1788 else
1789 {
1790 for( i=0; i<njobc; i++ ) effc[i] = 1.0;
1791 }
1792
1793 // FreeFloatMtx( lenc );
1794
1795 if( noalign ) // nen no tame weight wo keisan.
1796 {
1797 // FreeFloatHalfMtx( iscorec, njobc ); // saki ni continue suru baai ha fukkatsu.
1798 continue;
1799 }
1800
1801 // reporterr( "iadd = %d\n", iadd );
1802
1803 #if 0
1804 for( i=0; i<njobc-1; i++ )
1805 {
1806 fprintf( stderr, "\n step %d\n", i );
1807 fprintf( stderr, "topol[%d] = \n", i );
1808 for( j=0; topolc[i][0][j]!=-1; j++ ) fprintf( stderr, "%d ", topolc[i][0][j] );
1809 fprintf( stderr, "\n" );
1810 fprintf( stderr, "len=%f\n", lenc [i][0] );
1811 for( j=0; topolc[i][1][j]!=-1; j++ ) fprintf( stderr, "%d ", topolc[i][1][j] );
1812 fprintf( stderr, "\n" );
1813 fprintf( stderr, "len=%f\n", lenc [i][1] );
1814 }
1815
1816 fprintf( stderr, "\nneighbor = %d, iadd = %d\n", neighbor, iadd );
1817 #endif
1818 follows[iadd] = neighbor;
1819
1820 for( i=0; i<njobc-1; i++ ) mergeoralign[i] = 'n';
1821 for( j=njobc-1; j<njobc; j++ )
1822 {
1823 addmem[0] = j;
1824 addmem[1] = -1;
1825 for( i=0; i<njobc-1; i++ )
1826 {
1827 if( samemembern( topolc[i][0], addmem, 1 ) ) // arieru
1828 {
1829 // fprintf( stderr, "HIT!\n" );
1830 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1831 else mergeoralign[i] = '1';
1832 }
1833 else if( samemembern( topolc[i][1], addmem, 1 ) )
1834 {
1835 // fprintf( stderr, "HIT!\n" );
1836 if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
1837 else mergeoralign[i] = '2';
1838 }
1839 }
1840 }
1841
1842 // for( i=0; i<1; i++ ) addmem[i] = njobc-1+i;
1843 addmem[0] = njobc-1;
1844 addmem[1] = -1;
1845 for( i=0; i<njobc-1; i++ )
1846 {
1847 if( includemember( topolc[i][0], addmem ) && includemember( topolc[i][1], addmem ) )
1848 {
1849 mergeoralign[i] = 'w';
1850 }
1851 else if( includemember( topolc[i][0], addmem ) )
1852 {
1853 mergeoralign[i] = '1';
1854 // fprintf( stderr, "HIT 1! iadd=%d", iadd );
1855 }
1856 else if( includemember( topolc[i][1], addmem ) )
1857 {
1858 mergeoralign[i] = '2';
1859 // fprintf( stderr, "HIT 2! iadd=%d", iadd );
1860 }
1861 }
1862 #if 0
1863 for( i=0; i<njob-1; i++ )
1864 {
1865 fprintf( stderr, "mem0 = " );
1866 for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][0][j] );
1867 fprintf( stderr, "\n" );
1868 fprintf( stderr, "mem1 = " );
1869 for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][1][j] );
1870 fprintf( stderr, "\n" );
1871 fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
1872 }
1873 #endif
1874
1875
1876 #if 0
1877 for( i=0; i<norg; i++ ) fprintf( stderr, "seq[%d, iadd=%d] = \n%s\n", i, iadd, seq[i] );
1878 fprintf( stderr, "gapmapS (iadd=%d) = \n", iadd );
1879 for( i=0; i<lennocommongap; i++ ) fprintf( stderr, "%d\n", gapmapS[i] );
1880 #endif
1881
1882
1883 // fprintf( stderr, "Progressive alignment ... \r" );
1884
1885 #if 0
1886 pthread_mutex_lock( targ->mutex_counter );
1887 fprintf( stdout, "\nmergeoralign (iadd=%d) = ", iadd );
1888 for( i=0; i<njobc-1; i++ ) fprintf( stdout, "%c", mergeoralign[i] );
1889 fprintf( stdout, "\n" );
1890 pthread_mutex_unlock( targ->mutex_counter );
1891 #endif
1892
1893 singlerna = NULL;
1894 pscore = treebase( njobc, nlenc, bseq, 1, mergeoralign, mseq1, mseq2, topolc, effc, &alloclen, localhomtablec, singlerna, eff_kozo_mapped );
1895 #if 0
1896 pthread_mutex_lock( targ->mutex_counter );
1897 // fprintf( stdout, "res (iadd=%d) = %s, pscore=%f\n", iadd, bseq[norg], pscore );
1898 // fprintf( stdout, "effc (iadd=%d) = ", iadd );
1899 // for( i=0; i<njobc; i++ ) fprintf( stdout, "%f ", effc[i] );
1900 // fprintf( stdout, "\n" );
1901 pthread_mutex_unlock( targ->mutex_counter );
1902 #endif
1903
1904
1905 #if 0
1906 fprintf( trap_g, "done.\n" );
1907 fclose( trap_g );
1908 #endif
1909 // fprintf( stdout, "\n>seq[%d, iadd=%d] = \n%s\n", norg+iadd, iadd, seq[norg+iadd] );
1910 // fprintf( stdout, "\n>bseq[%d, iadd=%d] = \n%s\n", norg, iadd, bseq[norg] );
1911
1912 // strcpy( seq[norg+iadd], bseq[norg] );
1913
1914
1915 if( keeplength )
1916 {
1917 // reporterr( "deletelist = %p\n", deletelist );
1918 // reporterr( "deletelist+iadd = %p\n", deletelist+iadd );
1919 ndeleted += deletenewinsertions_whole_eq( norg, 1, bseq, bseq+norg, deletelist+iadd );
1920 // for( i=0; i<norg+1; i++ ) reporterr( ">\n%s\n", bseq[i] );
1921 strcpy( targetseq, bseq[norg] );
1922 i = norg; // no new gap!!
1923 }
1924 else
1925 {
1926 strcpy( targetseq, bseq[norg] );
1927 rep = -1;
1928 for( i=0; i<norg; i++ )
1929 {
1930 // fprintf( stderr, "Checking %d/%d\n", i, norg );
1931 if( strchr( bseq[i], '=' ) ) break;
1932 }
1933 }
1934
1935 if( i == norg )
1936 istherenewgap[iadd] = 0;
1937 else
1938 {
1939 rep = i;
1940 istherenewgap[iadd] = 1;
1941
1942
1943 makenewgaplist( newgaplist[iadd], bseq[rep] );
1944 // for( i=0; newgaplist[iadd][i]!=-1; i++ ) fprintf( stderr, "%d: %d\n", i, newgaplist[iadd][i] );
1945 }
1946 eq2dash( targetseq );
1947
1948 }
1949
1950
1951 #if 1
1952 if( constraint && localhomtablec )
1953 {
1954 for( i=0; i<njobc; i++ ) free( localhomtablec[i] );
1955 free( localhomtablec );
1956 localhomtablec = NULL;
1957 }
1958 if( mergeoralign ) free( mergeoralign ); mergeoralign = NULL;
1959 if( nogaplenjusttodecideaddhereornot ) free( nogaplenjusttodecideaddhereornot ); nogaplenjusttodecideaddhereornot = NULL;
1960 if( alnleninnode ) free( alnleninnode ); alnleninnode = NULL;
1961 if( tmpseq ) free( tmpseq ); tmpseq = NULL;
1962 if( bseq ) FreeCharMtx( bseq ); bseq = NULL;
1963 if( namec ) free( namec ); namec = NULL;
1964 if( nlenc ) free( nlenc ); nlenc = NULL;
1965 if( depc ) free( depc ); depc = NULL;
1966 if( topolc ) FreeIntCub( topolc ); topolc = NULL;
1967 if( lenc ) FreeFloatMtx( lenc ); lenc = NULL;
1968 if( effc ) FreeDoubleVec( effc ); effc = NULL;
1969 if( memlist0 ) free( memlist0 ); memlist0 = NULL;
1970 if( memlist1 ) free( memlist1 ); memlist1 = NULL;
1971 if( addmem ) free( addmem ); addmem = NULL;
1972 if( mseq1 ) free( mseq1 ); mseq1 = NULL;
1973 if( mseq2 ) free( mseq2 ); mseq2 = NULL;
1974 Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
1975 A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
1976 if( commonIP ) FreeIntMtx( commonIP );
1977 commonIP = NULL;
1978 commonAlloc1 = commonAlloc2 = 0;
1979 #endif
1980 // FreeFloatHalfMtx( iscorecbk, norg );
1981
1982 return( NULL );
1983 }
1984
1985 static int maxl;
1986 static int tsize;
1987 static int nunknown = 0;
1988
seq_grp_nuc(int * grp,char * seq)1989 void seq_grp_nuc( int *grp, char *seq )
1990 {
1991 int tmp;
1992 int *grpbk = grp;
1993 while( *seq )
1994 {
1995 tmp = amino_grp[(int)*seq++];
1996 if( tmp < 4 )
1997 *grp++ = tmp;
1998 else
1999 nunknown++;
2000 }
2001 *grp = END_OF_VEC;
2002 if( grp - grpbk < tuplesize )
2003 {
2004 // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
2005 // exit( 1 );
2006 *grpbk = -1;
2007 }
2008 }
2009
seq_grp(int * grp,char * seq)2010 void seq_grp( int *grp, char *seq )
2011 {
2012 int tmp;
2013 int *grpbk = grp;
2014 while( *seq )
2015 {
2016 tmp = amino_grp[(int)*seq++];
2017 if( tmp < 6 )
2018 *grp++ = tmp;
2019 else
2020 nunknown++;
2021 }
2022 *grp = END_OF_VEC;
2023 if( grp - grpbk < 6 )
2024 {
2025 // fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
2026 // exit( 1 );
2027 *grpbk = -1;
2028 }
2029 }
2030
makecompositiontable_p(int * table,int * pointt)2031 void makecompositiontable_p( int *table, int *pointt )
2032 {
2033 int point;
2034
2035 while( ( point = *pointt++ ) != END_OF_VEC )
2036 table[point]++;
2037 }
2038
commonsextet_p(int * table,int * pointt)2039 int commonsextet_p( int *table, int *pointt )
2040 {
2041 int value = 0;
2042 int tmp;
2043 int point;
2044 static TLS int *memo = NULL;
2045 static TLS int *ct = NULL;
2046 int *cp;
2047
2048 if( table == NULL )
2049 {
2050 if( memo ) free( memo );
2051 if( ct ) free( ct );
2052 return( 0 );
2053 }
2054
2055 if( *pointt == -1 )
2056 return( 0 );
2057
2058 if( !memo )
2059 {
2060 memo = (int *)calloc( tsize, sizeof( int ) );
2061 if( !memo ) ErrorExit( "Cannot allocate memo\n" );
2062 ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!!
2063 if( !ct ) ErrorExit( "Cannot allocate ct\n" );
2064 }
2065
2066 cp = ct;
2067 while( ( point = *pointt++ ) != END_OF_VEC )
2068 {
2069 tmp = memo[point]++;
2070 if( tmp < table[point] )
2071 value++;
2072 if( tmp == 0 ) *cp++ = point;
2073 }
2074 *cp = END_OF_VEC;
2075
2076 cp = ct;
2077 while( *cp != END_OF_VEC )
2078 memo[*cp++] = 0;
2079
2080 return( value );
2081 }
2082
makepointtable_nuc_dectet(int * pointt,int * n)2083 void makepointtable_nuc_dectet( int *pointt, int *n )
2084 {
2085 int point;
2086 register int *p;
2087
2088 if( *n == -1 )
2089 {
2090 *pointt = -1;
2091 return;
2092 }
2093
2094 p = n;
2095 point = *n++ *262144;
2096 point += *n++ * 65536;
2097 point += *n++ * 16384;
2098 point += *n++ * 4096;
2099 point += *n++ * 1024;
2100 point += *n++ * 256;
2101 point += *n++ * 64;
2102 point += *n++ * 16;
2103 point += *n++ * 4;
2104 point += *n++;
2105 *pointt++ = point;
2106
2107 while( *n != END_OF_VEC )
2108 {
2109 point -= *p++ *262144;
2110 point *= 4;
2111 point += *n++;
2112 *pointt++ = point;
2113
2114 }
2115 *pointt = END_OF_VEC;
2116 }
2117
makepointtable_nuc_octet(int * pointt,int * n)2118 void makepointtable_nuc_octet( int *pointt, int *n )
2119 {
2120 int point;
2121 register int *p;
2122
2123 if( *n == -1 )
2124 {
2125 *pointt = -1;
2126 return;
2127 }
2128
2129 p = n;
2130 point = *n++ * 16384;
2131 point += *n++ * 4096;
2132 point += *n++ * 1024;
2133 point += *n++ * 256;
2134 point += *n++ * 64;
2135 point += *n++ * 16;
2136 point += *n++ * 4;
2137 point += *n++;
2138 *pointt++ = point;
2139
2140 while( *n != END_OF_VEC )
2141 {
2142 point -= *p++ * 16384;
2143 point *= 4;
2144 point += *n++;
2145 *pointt++ = point;
2146 }
2147 *pointt = END_OF_VEC;
2148 }
2149
makepointtable_nuc(int * pointt,int * n)2150 void makepointtable_nuc( int *pointt, int *n )
2151 {
2152 int point;
2153 register int *p;
2154
2155 if( *n == -1 )
2156 {
2157 *pointt = -1;
2158 return;
2159 }
2160
2161 p = n;
2162 point = *n++ * 1024;
2163 point += *n++ * 256;
2164 point += *n++ * 64;
2165 point += *n++ * 16;
2166 point += *n++ * 4;
2167 point += *n++;
2168 *pointt++ = point;
2169
2170 while( *n != END_OF_VEC )
2171 {
2172 point -= *p++ * 1024;
2173 point *= 4;
2174 point += *n++;
2175 *pointt++ = point;
2176 }
2177 *pointt = END_OF_VEC;
2178 }
2179
makepointtable(int * pointt,int * n)2180 void makepointtable( int *pointt, int *n )
2181 {
2182 int point;
2183 register int *p;
2184
2185 if( *n == -1 )
2186 {
2187 *pointt = -1;
2188 return;
2189 }
2190
2191 p = n;
2192 point = *n++ * 7776;
2193 point += *n++ * 1296;
2194 point += *n++ * 216;
2195 point += *n++ * 36;
2196 point += *n++ * 6;
2197 point += *n++;
2198 *pointt++ = point;
2199
2200 while( *n != END_OF_VEC )
2201 {
2202 point -= *p++ * 7776;
2203 point *= 6;
2204 point += *n++;
2205 *pointt++ = point;
2206 }
2207 *pointt = END_OF_VEC;
2208 }
2209
2210 #ifdef enablemultithread
2211
dndprethread(void * arg)2212 void *dndprethread( void *arg )
2213 {
2214 dndprethread_arg_t *targ = (dndprethread_arg_t *)arg;
2215 int njob = targ->njob;
2216 int thread_no = targ->thread_no;
2217 double *selfscore = targ->selfscore;
2218 double **mtx = targ->mtx;
2219 char **seq = targ->seq;
2220 Jobtable2d *jobpospt = targ->jobpospt;
2221
2222 int i, j;
2223 double ssi, ssj, bunbo;
2224 double mtxv;
2225
2226 if( njob == 1 ) return( NULL );
2227
2228 while( 1 )
2229 {
2230 pthread_mutex_lock( targ->mutex );
2231 j = jobpospt->j;
2232 i = jobpospt->i;
2233 j++;
2234 // fprintf( stderr, "\n i=%d, j=%d before check\n", i, j );
2235 if( j == njob )
2236 {
2237 // fprintf( stderr, "\n j = %d, i = %d, njob = %d\n", j, i, njob );
2238 fprintf( stderr, "%4d/%4d (thread %4d), dndpre\r", i+1, njob, thread_no );
2239 i++;
2240 j = i + 1;
2241 if( i == njob-1 )
2242 {
2243 // fprintf( stderr, "\n i=%d, njob-1=%d\n", i, njob-1 );
2244 pthread_mutex_unlock( targ->mutex );
2245 return( NULL );
2246 }
2247 }
2248 // fprintf( stderr, "\n i=%d, j=%d after check\n", i, j );
2249 jobpospt->j = j;
2250 jobpospt->i = i;
2251 pthread_mutex_unlock( targ->mutex );
2252
2253 ssi = selfscore[i];
2254 ssj = selfscore[j];
2255
2256 bunbo = MIN( ssi, ssj );
2257 if( bunbo == 0.0 )
2258 mtxv = maxdist;
2259 else
2260 mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty * 10 ) / bunbo );
2261 #if 1
2262 if( mtxv > 9.0 || mtxv < 0.0 )
2263 {
2264 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
2265 exit( 1 );
2266 }
2267 #else // CHUUI!!! 2012/05/16
2268 if( mtxv > 2.0 )
2269 {
2270 mtxv = 2.0;
2271 }
2272 if( mtxv < 0.0 )
2273 {
2274 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
2275 exit( 1 );
2276 }
2277 #endif
2278 mtx[i][j-i] = mtxv;
2279 }
2280 }
2281
gaplist2alnxthread(void * arg)2282 static void *gaplist2alnxthread( void *arg )
2283 {
2284 gaplist2alnxthread_arg_t *targ = (gaplist2alnxthread_arg_t *)arg;
2285 // int thread_no = targ->thread_no;
2286 int ncycle = targ->ncycle;
2287 char **seq = targ->seq;
2288 int *newgaplist = targ->newgaplist;
2289 int *posmap = targ->posmap;
2290 int *jobpospt = targ->jobpospt;
2291 int tmpseqlen = targ->tmpseqlen;
2292 int lenfull = targ->lenfull;
2293 char *tmpseq1;
2294 int i;
2295
2296 tmpseq1 = AllocateCharVec( tmpseqlen );
2297
2298 while( 1 )
2299 {
2300 pthread_mutex_lock( targ->mutex );
2301 i = *jobpospt;
2302 if( i == ncycle )
2303 {
2304 pthread_mutex_unlock( targ->mutex );
2305 free( tmpseq1 );
2306 return( NULL );
2307 }
2308 *jobpospt = i+1;
2309 pthread_mutex_unlock( targ->mutex );
2310
2311 gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist, posmap, tmpseqlen );
2312 // fprintf( stderr, ">%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 );
2313 strcpy( seq[i], tmpseq1 );
2314 }
2315 }
2316
distancematrixthread(void * arg)2317 static void *distancematrixthread( void *arg )
2318 {
2319 distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
2320 int thread_no = targ->thread_no;
2321 int njob = targ->njob;
2322 int norg = targ->norg;
2323 int *jobpospt = targ->jobpospt;
2324 int **pointt = targ->pointt;
2325 double **imtx = targ->imtx;
2326 double **nmtx = targ->nmtx;
2327 double *selfscore = targ->selfscore;
2328 int *nogaplen = targ->nogaplen;
2329
2330 double lenfac, bunbo, longer, shorter, mtxv;
2331 int *table1;
2332 int i, j;
2333
2334 while( 1 )
2335 {
2336 pthread_mutex_lock( targ->mutex );
2337 i = *jobpospt;
2338 if( i == norg )
2339 {
2340 pthread_mutex_unlock( targ->mutex );
2341 commonsextet_p( NULL, NULL );
2342 return( NULL );
2343 }
2344 *jobpospt = i+1;
2345 pthread_mutex_unlock( targ->mutex );
2346
2347 table1 = (int *)calloc( tsize, sizeof( int ) );
2348 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2349 if( i % 100 == 0 )
2350 {
2351 fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, norg, thread_no );
2352 }
2353 makecompositiontable_p( table1, pointt[i] );
2354
2355 for( j=i+1; j<njob; j++ )
2356 {
2357 mtxv = (double)commonsextet_p( table1, pointt[j] );
2358 if( nogaplen[i] > nogaplen[j] )
2359 {
2360 longer=(double)nogaplen[i];
2361 shorter=(double)nogaplen[j];
2362 }
2363 else
2364 {
2365 longer=(double)nogaplen[j];
2366 shorter=(double)nogaplen[i];
2367 }
2368 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
2369 bunbo = MIN( selfscore[i], selfscore[j] );
2370
2371 if( j < norg )
2372 {
2373 if( bunbo == 0.0 )
2374 imtx[i][j-i] = maxdist;
2375 else
2376 imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
2377
2378 }
2379 else
2380 {
2381 if( bunbo == 0.0 )
2382 nmtx[i][j-norg] = maxdist;
2383 else
2384 nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
2385 }
2386 }
2387 free( table1 );
2388
2389 // for( j=i+1; j<norg; j++ )
2390 // imtx[i][j-i] = (double)commonsextet_p( table1, pointt[j] );
2391 // for( j=norg; j<njob; j++ )
2392 // nmtx[i][j-norg] = (double)commonsextet_p( table1, pointt[j] );
2393 // free( table1 );
2394 }
2395 }
2396 #endif
2397
2398
ktupledistancematrix(int nseq,int norg,int nlenmax,char ** seq,char ** name,double ** imtx,double ** nmtx)2399 void ktupledistancematrix( int nseq, int norg, int nlenmax, char **seq, char **name, double **imtx, double **nmtx )
2400 {
2401 char *tmpseq;
2402 int *grpseq;
2403 int **pointt;
2404 int i, j;
2405 int *nogaplen;
2406 int *table1;
2407 double lenfac, bunbo, longer, shorter, mtxv;
2408 double *selfscore;
2409 selfscore = AllocateFloatVec( nseq );
2410
2411 fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
2412 fflush( stderr );
2413
2414 tmpseq = AllocateCharVec( nlenmax+1 );
2415 grpseq = AllocateIntVec( nlenmax+1 );
2416 pointt = AllocateIntMtx( nseq, nlenmax+1 );
2417 nogaplen = AllocateIntVec( nseq );
2418
2419 if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize );
2420 else tsize = (int)pow( 6, 6 );
2421
2422 if( dorp == 'd' && tuplesize == 6 )
2423 {
2424 lenfaca = D6LENFACA;
2425 lenfacb = D6LENFACB;
2426 lenfacc = D6LENFACC;
2427 lenfacd = D6LENFACD;
2428 }
2429 else if( dorp == 'd' && tuplesize == 10 )
2430 {
2431 lenfaca = D10LENFACA;
2432 lenfacb = D10LENFACB;
2433 lenfacc = D10LENFACC;
2434 lenfacd = D10LENFACD;
2435 }
2436 else
2437 {
2438 lenfaca = PLENFACA;
2439 lenfacb = PLENFACB;
2440 lenfacc = PLENFACC;
2441 lenfacd = PLENFACD;
2442 }
2443
2444 maxl = 0;
2445 for( i=0; i<nseq; i++ )
2446 {
2447 gappick0( tmpseq, seq[i] );
2448 nogaplen[i] = strlen( tmpseq );
2449 if( nogaplen[i] < 6 )
2450 {
2451 // fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
2452 // fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
2453 // exit( 1 );
2454 }
2455 if( nogaplen[i] > maxl ) maxl = nogaplen[i];
2456 if( dorp == 'd' ) /* nuc */
2457 {
2458 seq_grp_nuc( grpseq, tmpseq );
2459 // makepointtable_nuc( pointt[i], grpseq );
2460 // makepointtable_nuc_octet( pointt[i], grpseq );
2461 if( tuplesize == 10 )
2462 makepointtable_nuc_dectet( pointt[i], grpseq );
2463 else if( tuplesize == 6 )
2464 makepointtable_nuc( pointt[i], grpseq );
2465 else
2466 {
2467 fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize );
2468 exit( 1 );
2469 }
2470 }
2471 else /* amino */
2472 {
2473 seq_grp( grpseq, tmpseq );
2474 makepointtable( pointt[i], grpseq );
2475 }
2476
2477 }
2478 if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown );
2479
2480 for( i=0; i<nseq; i++ ) // serial de jubun
2481 {
2482 table1 = (int *)calloc( tsize, sizeof( int ) );
2483 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2484 makecompositiontable_p( table1, pointt[i] );
2485
2486 selfscore[i] = (double)commonsextet_p( table1, pointt[i] );
2487 free( table1 );
2488 }
2489
2490 #ifdef enablemultithread
2491 if( nthread > 0 )
2492 {
2493 distancematrixthread_arg_t *targ;
2494 int jobpos;
2495 pthread_t *handle;
2496 pthread_mutex_t mutex;
2497
2498 jobpos = 0;
2499 targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) );
2500 handle = calloc( nthread, sizeof( pthread_t ) );
2501 pthread_mutex_init( &mutex, NULL );
2502
2503 for( i=0; i<nthread; i++ )
2504 {
2505 targ[i].thread_no = i;
2506 targ[i].njob = nseq;
2507 targ[i].norg = norg;
2508 targ[i].jobpospt = &jobpos;
2509 targ[i].pointt = pointt;
2510 targ[i].imtx = imtx;
2511 targ[i].nmtx = nmtx;
2512 targ[i].selfscore = selfscore;
2513 targ[i].nogaplen = nogaplen;
2514 targ[i].mutex = &mutex;
2515
2516 pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
2517 }
2518
2519 for( i=0; i<nthread; i++ )
2520 {
2521 pthread_join( handle[i], NULL );
2522 }
2523 pthread_mutex_destroy( &mutex );
2524 free( handle );
2525 free( targ );
2526
2527 }
2528 else
2529 #endif
2530 {
2531 for( i=0; i<norg; i++ )
2532 {
2533 table1 = (int *)calloc( tsize, sizeof( int ) );
2534 if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
2535 if( i % 100 == 0 )
2536 {
2537 fprintf( stderr, "\r% 5d / %d", i+1, norg );
2538 fflush( stderr );
2539 }
2540 makecompositiontable_p( table1, pointt[i] );
2541
2542 for( j=i+1; j<nseq; j++ )
2543 {
2544 mtxv = (double)commonsextet_p( table1, pointt[j] );
2545 if( nogaplen[i] > nogaplen[j] )
2546 {
2547 longer=(double)nogaplen[i];
2548 shorter=(double)nogaplen[j];
2549 }
2550 else
2551 {
2552 longer=(double)nogaplen[j];
2553 shorter=(double)nogaplen[i];
2554 }
2555 lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
2556 bunbo = MIN( selfscore[i], selfscore[j] );
2557
2558 if( j < norg )
2559 {
2560 if( bunbo == 0.0 )
2561 imtx[i][j-i] = maxdist;
2562 else
2563 imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
2564 }
2565 else
2566 {
2567 if( bunbo == 0.0 )
2568 nmtx[i][j-norg] = maxdist;
2569 else
2570 nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
2571 }
2572 }
2573 free( table1 );
2574 }
2575 }
2576
2577 fprintf( stderr, "\ndone.\n\n" );
2578 fflush( stderr );
2579
2580 free( grpseq );
2581 free( tmpseq );
2582 FreeIntMtx( pointt );
2583 free( nogaplen );
2584 free( selfscore );
2585
2586
2587 if( hitout<0.0 )
2588 {
2589 fprintf( stdout, "Threshold=%f\n\n", -hitout );
2590 for( i=0; i<norg; i++ )
2591 {
2592 for( j=norg; j<nseq; j++ )
2593 {
2594 if( nmtx[i][j-norg] < -hitout )
2595 break;
2596 }
2597 if( j<nseq )
2598 {
2599 fprintf( stdout, "%s may be similar to:\n", name[i]+1 );
2600 for( j=norg; j<nseq; j++ )
2601 {
2602 if( nmtx[i][j-norg] < -hitout )
2603 fprintf( stdout, " %s, %f\n", name[j]+1, nmtx[i][j-norg] );
2604 }
2605 fprintf( stdout, "\n" );
2606 }
2607 }
2608 exit( 1 );
2609 }
2610 if( hitout>0.0 )
2611 {
2612 fprintf( stdout, "Threshold=%f\n\n", hitout );
2613 for( i=norg; i<nseq; i++ )
2614 {
2615 for( j=0; j<norg; j++ )
2616 {
2617 if( nmtx[j][i-norg] < hitout )
2618 break;
2619 }
2620 if( j<norg )
2621 {
2622 fprintf( stdout, "%s may be similar to:\n", name[i]+1 );
2623 for( j=0; j<norg; j++ )
2624 {
2625 if( nmtx[j][i-norg] < hitout )
2626 fprintf( stdout, " %s, %f\n", name[j]+1, nmtx[j][i-norg] );
2627 }
2628 fprintf( stdout, "\n" );
2629 }
2630 }
2631 exit( 1 );
2632 }
2633
2634 #if 0 // writehat2 wo kakinaosu
2635 if( distout )
2636 {
2637 hat2p = fopen( "hat2", "w" );
2638 WriteFloatHat2_pointer_halfmtx( hat2p, nseq, name, mtx );
2639 fclose( hat2p );
2640 }
2641 #endif
2642 }
2643
dndpre(int nseq,char ** seq,double ** mtx)2644 void dndpre( int nseq, char **seq, double **mtx ) // not used yet
2645 {
2646 int i, j, ilim;
2647 double *selfscore;
2648 double mtxv;
2649 double ssi, ssj, bunbo;
2650
2651 selfscore = AllocateFloatVec( nseq );
2652
2653 for( i=0; i<nseq; i++ )
2654 {
2655 selfscore[i] = (double)naivepairscore11( seq[i], seq[i], 0 );
2656 }
2657 #ifdef enablemultithread
2658 if( nthread > 0 )
2659 {
2660 dndprethread_arg_t *targ;
2661 Jobtable2d jobpos;
2662 pthread_t *handle;
2663 pthread_mutex_t mutex;
2664
2665 jobpos.i = 0;
2666 jobpos.j = 0;
2667
2668 targ = calloc( nthread, sizeof( dndprethread_arg_t ) );
2669 handle = calloc( nthread, sizeof( pthread_t ) );
2670 pthread_mutex_init( &mutex, NULL );
2671
2672 for( i=0; i<nthread; i++ )
2673 {
2674 targ[i].thread_no = i;
2675 targ[i].njob = nseq;
2676 targ[i].selfscore = selfscore;
2677 targ[i].mtx = mtx;
2678 targ[i].seq = seq;
2679 targ[i].jobpospt = &jobpos;
2680 targ[i].mutex = &mutex;
2681
2682 pthread_create( handle+i, NULL, dndprethread, (void *)(targ+i) );
2683 }
2684
2685 for( i=0; i<nthread; i++ )
2686 {
2687 pthread_join( handle[i], NULL );
2688 }
2689 pthread_mutex_destroy( &mutex );
2690
2691 }
2692 else
2693 #endif
2694 {
2695 ilim = nseq-1;
2696 for( i=0; i<ilim; i++ )
2697 {
2698 ssi = selfscore[i];
2699 fprintf( stderr, "%4d/%4d\r", i+1, nseq );
2700
2701 for( j=i+1; j<nseq; j++ )
2702 {
2703 ssj = selfscore[j];
2704 bunbo = MIN( ssi, ssj );
2705 if( bunbo == 0.0 )
2706 mtxv = maxdist;
2707 else
2708 mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty * 10 ) / bunbo );
2709
2710 #if 1
2711 if( mtxv > 9.0 || mtxv < 0.0 )
2712 {
2713 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
2714 exit( 1 );
2715 }
2716 #else // CHUUI!!! 2012/05/16
2717 if( mtxv > 2.0 )
2718 {
2719 mtxv = 2.0;
2720 }
2721 if( mtxv < 0.0 )
2722 {
2723 fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
2724 exit( 1 );
2725 }
2726 #endif
2727 mtx[i][j-i] = mtxv;
2728 }
2729 }
2730 }
2731
2732 #if TEST
2733 for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
2734 fprintf( stdout, "i=%d, j=%d, mtx[][] = %f\n", i, j, mtx[i][j] );
2735 #endif
2736 free( selfscore );
2737
2738 }
2739
searchlet(char * p1,char * p2)2740 static int searchlet( char *p1, char *p2 )
2741 {
2742 char *p;
2743 for( p=p1; p<=p2; p++ )
2744 {
2745 if( *p != '-' )
2746 {
2747 return( p-p1 );
2748 }
2749 }
2750 return( -1 );
2751 }
2752
smoothing2(int njob,int nadd,int lenfull,char ** seq,Blocktorealign * realign)2753 static void smoothing2( int njob, int nadd, int lenfull, char **seq, Blocktorealign *realign )
2754 {
2755 int i, j, norg = njob-nadd;
2756
2757 reporterr( "Smoothing2\n" );
2758 for( i=2; i<lenfull+1; i++ )
2759 {
2760 int postoshiftfrom;
2761 int shiftonadd;
2762 int postoshiftto;
2763 if( realign[i-2].nnewres && realign[i-1].nnewres == 0 && realign[i].nnewres )
2764 {
2765 postoshiftto = realign[i-2].start;
2766 postoshiftfrom = realign[i-2].end + 1;
2767 if( postoshiftfrom != realign[i].start -2 )
2768 {
2769 reporterr( "Unexpected pattern??? i=%d, realign[i-1].end=%d, realign[i].start=%d\n", i, realign[i-1].end, realign[i].start );
2770 exit( 1 );
2771 }
2772
2773 realign[i].nnewres += realign[i-2].nnewres;
2774 realign[i].start = realign[i-2].start+2;
2775 realign[i-2].nnewres = 0;
2776 realign[i-2].start = realign[i-2].end = 0;
2777
2778
2779 // reporterr( "SHIFT %d -> %d\n", postoshiftfrom, postoshiftto );
2780 for( j=0; j<norg; j++ )
2781 {
2782 if( seq[j][postoshiftto] != '-' )
2783 {
2784 reporterr( "Unexpected pattern 2. i=%d, postoshiftto=%d, postoshiftfrom=%d, seq[%d][%d]=%c\n", i, postoshiftto, postoshiftfrom, j, postoshiftto, seq[j][postoshiftto] );
2785 // reporterr( seq[j] );
2786 exit( 1 );
2787 }
2788 seq[j][postoshiftto] = seq[j][postoshiftfrom];
2789 seq[j][postoshiftfrom] = '-';
2790
2791 if( seq[j][postoshiftto+1] != '-' )
2792 {
2793 reporterr( "Unexpected pattern 3???\n" );
2794 exit( 1 );
2795 }
2796 seq[j][postoshiftto+1] = seq[j][postoshiftfrom+1];
2797 seq[j][postoshiftfrom+1] = '-';
2798 }
2799 for( j=norg; j<njob; j++ )
2800 {
2801 if( seq[j][postoshiftto] == '-' )
2802 {
2803 shiftonadd = searchlet( seq[j]+postoshiftto, seq[j]+postoshiftfrom );
2804 if( shiftonadd != -1 )
2805 {
2806 seq[j][postoshiftto] = seq[j][postoshiftto+shiftonadd];
2807 seq[j][postoshiftto+shiftonadd] = '-';
2808 }
2809 }
2810 if( seq[j][postoshiftto+1] == '-' )
2811 {
2812 shiftonadd = searchlet( seq[j]+postoshiftto+1, seq[j]+postoshiftfrom+1 );
2813 if( shiftonadd != -1 )
2814 {
2815 seq[j][postoshiftto+1] = seq[j][postoshiftto+1+shiftonadd];
2816 seq[j][postoshiftto+1+shiftonadd] = '-';
2817 }
2818 }
2819 }
2820 }
2821 }
2822 // for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
2823 }
smoothing1(int njob,int nadd,int lenfull,char ** seq,Blocktorealign * realign)2824 static void smoothing1( int njob, int nadd, int lenfull, char **seq, Blocktorealign *realign )
2825 {
2826 int i, j, norg = njob-nadd;
2827
2828 reporterr( "Smoothing1\n" );
2829 for( i=1; i<lenfull+1; i++ )
2830 {
2831 int postoshiftfrom;
2832 int shiftonadd;
2833 int postoshiftto;
2834 if( realign[i-1].nnewres && realign[i].nnewres )
2835 {
2836 postoshiftto = realign[i-1].start;
2837 postoshiftfrom = realign[i-1].end + 1;
2838 if( postoshiftfrom != realign[i].start -1 )
2839 {
2840 reporterr( "Unexpected pattern??? i=%d, realign[i-1].end=%d, realign[i].start=%d\n", i, realign[i-1].end, realign[i].start );
2841 exit( 1 );
2842 }
2843
2844 realign[i].nnewres += realign[i-1].nnewres;
2845 realign[i].start = realign[i-1].start+1;
2846 realign[i-1].nnewres = 0;
2847 realign[i-1].start = realign[i-1].end = 0;
2848
2849
2850 // reporterr( "SHIFT %d -> %d\n", postoshiftfrom, postoshiftto );
2851 for( j=0; j<norg; j++ )
2852 {
2853 if( seq[j][postoshiftto] != '-' )
2854 {
2855 reporterr( "Unexpected pattern 2???\n" );
2856 exit( 1 );
2857 }
2858 seq[j][postoshiftto] = seq[j][postoshiftfrom];
2859 seq[j][postoshiftfrom] = '-';
2860 }
2861 for( j=norg; j<njob; j++ )
2862 {
2863 if( seq[j][postoshiftto] == '-' )
2864 {
2865 shiftonadd = searchlet( seq[j]+postoshiftto, seq[j]+postoshiftfrom );
2866 if( shiftonadd != -1 )
2867 {
2868 seq[j][postoshiftto] = seq[j][postoshiftto+shiftonadd];
2869 seq[j][postoshiftto+shiftonadd] = '-';
2870 }
2871 }
2872 }
2873 }
2874 }
2875 // for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
2876 }
2877
main(int argc,char * argv[])2878 int main( int argc, char *argv[] )
2879 {
2880 static int *nlen;
2881 static char **name, **seq;
2882 static char **tmpseq;
2883 static char *tmpseq1;
2884 // static char *check1, *check2;
2885 static double **iscore, **iscore_kozo;
2886 static double *eff_kozo, *eff_kozo_mapped = NULL;
2887 int i, j, f, ien;
2888 int iadd;
2889 static int ***topol_kozo;
2890 Treedep *dep;
2891 static double **len_kozo;
2892 FILE *prep;
2893 FILE *infp;
2894 FILE *hat2p;
2895 int alignmentlength;
2896 char c;
2897 int alloclen, fullseqlen, tmplen;
2898 LocalHom **localhomtable = NULL;
2899 static char *kozoarivec;
2900 int nkozo;
2901 int njobc, norg, lenfull;
2902 int **newgaplist_o;
2903 int *newgaplist_compact;
2904 int **follower;
2905 int *follows;
2906 int *istherenewgap;
2907 int zure;
2908 int *posmap;
2909 int *ordertable;
2910 FILE *orderfp;
2911 int tmpseqlen;
2912 Blocktorealign *realign;
2913 RNApair ***singlerna;
2914 int ***topol;
2915 double **len;
2916 double **iscoreo, **nscore;
2917 FILE *fp;
2918 int **deletelist = NULL;
2919 char **addbk = NULL;
2920 char *originalgaps = NULL;
2921 Addtree *addtree;
2922
2923
2924 arguments( argc, argv );
2925 #ifndef enablemultithread
2926 nthread = 0;
2927 #endif
2928
2929
2930 if( fastathreshold < 0.0001 ) constraint = 0;
2931
2932 if( inputfile )
2933 {
2934 infp = fopen( inputfile, "r" );
2935 if( !infp )
2936 {
2937 fprintf( stderr, "Cannot open %s\n", inputfile );
2938 exit( 1 );
2939 }
2940 }
2941 else
2942 infp = stdin;
2943
2944 getnumlen( infp );
2945 rewind( infp );
2946
2947
2948 nkozo = 0;
2949
2950 if( njob < 2 )
2951 {
2952 fprintf( stderr, "At least 2 sequences should be input!\n"
2953 "Only %d sequence found.\n", njob );
2954 exit( 1 );
2955 }
2956
2957 norg = njob-nadd;
2958 njobc = norg+1;
2959 fprintf( stderr, "norg = %d\n", norg );
2960 fprintf( stderr, "njobc = %d\n", njobc );
2961 if( norg > 1000 || nadd > 1000 ) use_fft = 0;
2962
2963 fullseqlen = alloclen = nlenmax*4+1; //chuui!
2964 seq = AllocateCharMtx( njob, alloclen );
2965
2966 name = AllocateCharMtx( njob, B+1 );
2967 nlen = AllocateIntVec( njob );
2968
2969 ndeleted = 0;
2970
2971
2972 if( multidist || tuplesize > 0 )
2973 {
2974 iscore = AllocateFloatHalfMtx( norg );
2975 nscore = AllocateFloatMtx( norg, nadd );
2976 }
2977 else
2978 {
2979 iscore = AllocateFloatHalfMtx( njob );
2980 nscore = NULL;
2981 }
2982
2983 kozoarivec = AllocateCharVec( njob );
2984
2985
2986 ordertable = AllocateIntVec( norg+1 );
2987
2988
2989 if( constraint )
2990 {
2991 #if SMALLMEMORY
2992 if( multidist )
2993 {
2994 localhomtable = (LocalHom **)calloc( norg, sizeof( LocalHom *) );
2995 for( i=0; i<norg; i++)
2996 {
2997 localhomtable[i] = (LocalHom *)calloc( nadd, sizeof( LocalHom ) );
2998 for( j=0; j<nadd; j++)
2999 {
3000 localhomtable[i][j].start1 = -1;
3001 localhomtable[i][j].end1 = -1;
3002 localhomtable[i][j].start2 = -1;
3003 localhomtable[i][j].end2 = -1;
3004 localhomtable[i][j].overlapaa = -1.0;
3005 localhomtable[i][j].opt = -1.0;
3006 localhomtable[i][j].importance = -1.0;
3007 localhomtable[i][j].next = NULL;
3008 localhomtable[i][j].korh = 'h';
3009 }
3010 }
3011 // localhomtable = (LocalHom **)calloc( norg+nadd, sizeof( LocalHom *) );
3012 // for( i=norg; i<norg+nadd; i++) // hontou ha iranai
3013 // {
3014 // localhomtable[i] = (LocalHom *)calloc( norg, sizeof( LocalHom ) );
3015 // for( j=0; j<norg; j++)
3016 // {
3017 // localhomtable[i][j].start1 = -1;
3018 // localhomtable[i][j].end1 = -1;
3019 // localhomtable[i][j].start2 = -1;
3020 // localhomtable[i][j].end2 = -1;
3021 // localhomtable[i][j].overlapaa = -1.0;
3022 // localhomtable[i][j].opt = -1.0;
3023 // localhomtable[i][j].importance = -1.0;
3024 // localhomtable[i][j].next = NULL;
3025 // localhomtable[i][j].korh = 'h';
3026 // }
3027 // }
3028 }
3029 else
3030 #endif
3031 {
3032 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
3033 for( i=0; i<njob; i++)
3034 {
3035 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
3036 for( j=0; j<njob; j++)
3037 {
3038 localhomtable[i][j].start1 = -1;
3039 localhomtable[i][j].end1 = -1;
3040 localhomtable[i][j].start2 = -1;
3041 localhomtable[i][j].end2 = -1;
3042 localhomtable[i][j].overlapaa = -1.0;
3043 localhomtable[i][j].opt = -1.0;
3044 localhomtable[i][j].importance = -1.0;
3045 localhomtable[i][j].next = NULL;
3046 localhomtable[i][j].korh = 'h';
3047 }
3048 }
3049 }
3050
3051 fprintf( stderr, "Loading 'hat3' ... " );
3052 prep = fopen( "hat3", "r" );
3053 if( prep == NULL ) ErrorExit( "Make hat3." );
3054 #if SMALLMEMORY
3055 if( multidist )
3056 {
3057 // readlocalhomtable_two( prep, norg, nadd, localhomtable, localhomtable+norg, kozoarivec );
3058 readlocalhomtable_one( prep, norg, nadd, localhomtable, kozoarivec );
3059 }
3060 else
3061 #endif
3062 {
3063 readlocalhomtable( prep, njob, localhomtable, kozoarivec );
3064 }
3065
3066 fclose( prep );
3067 fprintf( stderr, "\ndone.\n" );
3068
3069
3070 nkozo = 0;
3071 for( i=0; i<njob; i++ )
3072 {
3073 // fprintf( stderr, "kozoarivec[%d] = %d\n", i, kozoarivec[i] );
3074 if( kozoarivec[i] ) nkozo++;
3075 }
3076 if( nkozo )
3077 {
3078 topol_kozo = AllocateIntCub( nkozo, 2, 0 );
3079 len_kozo = AllocateFloatMtx( nkozo, 2 );
3080 iscore_kozo = AllocateFloatHalfMtx( nkozo );
3081 eff_kozo = AllocateDoubleVec( nkozo );
3082 eff_kozo_mapped = AllocateDoubleVec( njob );
3083 }
3084
3085
3086 #if SMALLMEMORY
3087 // outlocalhom_part( localhomtable, norg, nadd );
3088 #else
3089 // outlocalhom( localhomtable, njob );
3090 #endif
3091
3092 #if 0
3093 fprintf( stderr, "Extending localhom ... " );
3094 extendlocalhom2( njob, localhomtable );
3095 fprintf( stderr, "done.\n" );
3096 #endif
3097 }
3098
3099 #if 0
3100 readData( infp, name, nlen, seq );
3101 #else
3102 readData_pointer( infp, name, nlen, seq );
3103 fclose( infp );
3104 #endif
3105
3106 constants( njob, seq );
3107
3108 #if 0
3109 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
3110 #endif
3111
3112 initSignalSM();
3113
3114 initFiles();
3115
3116 // WriteOptions( trap_g );
3117
3118 c = seqcheck( seq );
3119 if( c )
3120 {
3121 fprintf( stderr, "Illegal character %c\n", c );
3122 exit( 1 );
3123 }
3124
3125 alignmentlength = strlen( seq[0] );
3126 for( i=0; i<norg; i++ )
3127 {
3128 if( hitout == 0.0 && alignmentlength != strlen( seq[i] ) )
3129 {
3130 fprintf( stderr, "#################################################################################\n" );
3131 fprintf( stderr, "# ERROR! #\n" );
3132 fprintf( stderr, "# The original%4d sequences must be aligned #\n", njob-nadd );
3133 fprintf( stderr, "#################################################################################\n" );
3134 exit( 1 );
3135 }
3136 }
3137 if( addprofile )
3138 {
3139 fprintf( stderr, "Not supported!\n" );
3140 exit( 1 );
3141 }
3142
3143 if( tuplesize > 0 ) // if mtx is internally computed
3144 {
3145 if( multidist == 1 )
3146 {
3147 ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); // iscore ha muda.
3148
3149 // hat2p = fopen( "hat2-1", "w" );
3150 // WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore );
3151 // fclose( hat2p );
3152
3153 dndpre( norg, seq, iscore );
3154 // fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " );
3155 // prep = fopen( "hat2i", "r" );
3156 // if( prep == NULL ) ErrorExit( "Make hat2i." );
3157 // readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore );
3158 // fclose( prep );
3159 // fprintf( stderr, "done.\n" );
3160
3161 // hat2p = fopen( "hat2-2", "w" );
3162 // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore );
3163 // fclose( hat2p );
3164 }
3165 else
3166 {
3167 ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore );
3168 }
3169 }
3170 else
3171 {
3172 if( multidist == 1 )
3173 {
3174 fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " );
3175 prep = fopen( "hat2n", "r" );
3176 if( prep == NULL ) ErrorExit( "Make hat2n." );
3177 readhat2_doublehalf_part_pointer( prep, njob, nadd, name, nscore );
3178 fclose( prep );
3179 fprintf( stderr, "done.\n" );
3180
3181 fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " );
3182 prep = fopen( "hat2i", "r" );
3183 if( prep == NULL ) ErrorExit( "Make hat2i." );
3184 readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore );
3185 fclose( prep );
3186 fprintf( stderr, "done.\n" );
3187 }
3188 else
3189 {
3190 fprintf( stderr, "Loading 'hat2' ... " );
3191 prep = fopen( "hat2", "r" );
3192 if( prep == NULL ) ErrorExit( "Make hat2." );
3193 readhat2_doublehalf_pointer( prep, njob, name, iscore );
3194 fclose( prep );
3195 fprintf( stderr, "done.\n" );
3196 }
3197 }
3198
3199 #if 1
3200 if( distout )
3201 {
3202 fprintf( stderr, "Writing distances between new sequences and existing msa.\n" );
3203 hat2p = fopen( "hat2", "w" );
3204 if( multidist || tuplesize > 0 )
3205 {
3206 for( iadd=0; iadd<nadd; iadd++ )
3207 {
3208 fprintf( hat2p, "Distance between new sequence %d and %d sequences in existing msa\n", iadd+1, norg );
3209 for( i=0; i<norg; i++ )
3210 {
3211 fprintf( hat2p, "%5.3f ", nscore[i][iadd] );
3212 if( (i+1) % 12 == 0 ) fprintf( hat2p, "\n" );
3213 }
3214 fprintf( hat2p, "\n\n" );
3215 }
3216 }
3217 else
3218 {
3219 for( iadd=0; iadd<nadd; iadd++ )
3220 {
3221 fprintf( hat2p, "Distance between new sequence %d and %d sequences in existing msa\n", iadd+1, norg );
3222 for( i=0; i<norg; i++ )
3223 {
3224 fprintf( hat2p, "%5.3f ", iscore[i][norg+iadd-i] );
3225 if( (i+1) % 12 == 0 ) fprintf( hat2p, "\n" );
3226 }
3227 fprintf( hat2p, "\n\n" );
3228 }
3229 }
3230 fclose( hat2p );
3231 // exit( 1 );
3232 // hat2p = fopen( "hat2", "w" );
3233 // WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore );
3234 // fclose( hat2p );
3235 // exit( 1 );
3236 }
3237 #endif
3238
3239
3240 singlerna = NULL;
3241
3242
3243 if( keeplength )
3244 {
3245 lenfull = strlen( seq[0] );
3246 originalgaps = (char *)calloc( lenfull+1, sizeof( char) );
3247 recordoriginalgaps( originalgaps, norg, seq );
3248
3249
3250 deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
3251 for( i=0; i<nadd; i++ )
3252 {
3253 deletelist[i] = calloc( 1, sizeof( int ) );
3254 deletelist[i][0] = -1;
3255 }
3256 deletelist[nadd] = NULL;
3257
3258 }
3259 else
3260 {
3261 originalgaps = NULL;
3262 deletelist = NULL;
3263 }
3264
3265 commongappick( norg, seq );
3266 lenfull = strlen( seq[0] );
3267
3268 if( keeplength && mapout )
3269 {
3270 addbk = (char **)calloc( nadd+1, sizeof( char * ) );
3271 for( i=0; i<nadd; i++ )
3272 {
3273 ien = strlen( seq[norg+i] );
3274 addbk[i] = (char *)calloc( ien + 1, sizeof( char ) );
3275 gappick0( addbk[i], seq[norg+i] );
3276 }
3277 addbk[nadd] = NULL;
3278 }
3279 else
3280 {
3281 addbk = NULL;
3282 }
3283
3284
3285
3286 // newgaplist_o = AllocateIntMtx( nadd, alloclen ); //ookisugi
3287 newgaplist_o = AllocateIntMtx( nadd, lenfull*2 );
3288 newgaplist_compact = AllocateIntVec( lenfull*2 );
3289 istherenewgap = AllocateIntVec( nadd );
3290 follower = AllocateIntMtx( norg, 1 );
3291 for( i=0; i<norg; i++ ) follower[i][0] = -1;
3292 follows = AllocateIntVec( nadd );
3293
3294 dep = (Treedep *)calloc( norg, sizeof( Treedep ) );
3295 topol = AllocateIntCub( norg, 2, 0 );
3296 len = AllocateFloatMtx( norg, 2 );
3297 // iscoreo = AllocateFloatHalfMtx( norg );
3298 mtxcpy( norg, norg, &iscoreo, iscore );
3299
3300 if( treeout )
3301 {
3302 addtree = (Addtree *)calloc( nadd, sizeof( Addtree ) );
3303 if( !addtree )
3304 {
3305 fprintf( stderr, "Cannot allocate addtree\n" );
3306 exit( 1 );
3307 }
3308 }
3309
3310
3311 // nlim = norg-1;
3312 // for( i=0; i<nlim; i++ )
3313 // {
3314 // fptc = iscoreo[i]+1;
3315 // fpt = iscore[i]+1;
3316 // j = norg-i-1;
3317 // while( j-- )
3318 // *fptc++ = *fpt++;
3319 //// for( j=i+1; j<norg; j++ )
3320 //// iscoreo[i][j-i] = iscore[i][j-i];
3321 // }
3322
3323 // fprintf( stderr, "building a tree.." );
3324 if( treein )
3325 {
3326 reporterr( "Loading a tree ... " );
3327 loadtop( norg, iscoreo, topol, len, name, NULL, dep ); // nogaplen?
3328 reporterr( "\ndone.\n\n" );
3329 }
3330 else if( treeout )
3331 fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( norg, iscoreo, topol, len, name, nlen, dep, 1 );
3332 else
3333 fixed_musclesupg_double_realloc_nobk_halfmtx( norg, iscoreo, topol, len, dep, 0, 1 );
3334 // fprintf( stderr, "done.\n" );
3335
3336 if( norg > 1 )
3337 cnctintvec( ordertable, topol[norg-2][0], topol[norg-2][1] );
3338 else
3339 {
3340 ordertable[0] = 0; ordertable[1] = -1;
3341 }
3342 FreeFloatHalfMtx( iscoreo, norg );
3343
3344 #ifdef enablemultithread
3345 if( nthread )
3346 {
3347 pthread_t *handle;
3348 pthread_mutex_t mutex_counter;
3349 thread_arg_t *targ;
3350 int *iaddsharept;
3351
3352 targ = calloc( nthread, sizeof( thread_arg_t ) );
3353 handle = calloc( nthread, sizeof( pthread_t ) );
3354 pthread_mutex_init( &mutex_counter, NULL );
3355 iaddsharept = calloc( 1, sizeof(int) );
3356 *iaddsharept = 0;
3357
3358 for( i=0; i<nthread; i++ )
3359 {
3360 targ[i].thread_no = i;
3361 targ[i].follows = follows;
3362 targ[i].njob = njob;
3363 targ[i].nadd = nadd;
3364 targ[i].nlen = nlen;
3365 targ[i].name = name;
3366 targ[i].seq = seq;
3367 targ[i].localhomtable = localhomtable;
3368 targ[i].iscore = iscore;
3369 targ[i].nscore = nscore;
3370 targ[i].istherenewgap = istherenewgap;
3371 targ[i].newgaplist = newgaplist_o;
3372 targ[i].singlerna = singlerna;
3373 targ[i].eff_kozo_mapped = eff_kozo_mapped;
3374 targ[i].alloclen = alloclen;
3375 targ[i].iaddshare = iaddsharept;
3376 targ[i].dep = dep;
3377 targ[i].topol = topol;
3378 targ[i].len = len;
3379 targ[i].addtree = addtree;
3380 targ[i].deletelist = deletelist;
3381 targ[i].mutex_counter = &mutex_counter;
3382 pthread_create( handle+i, NULL, addsinglethread, (void *)(targ+i) );
3383 }
3384 for( i=0; i<nthread; i++ )
3385 {
3386 pthread_join( handle[i], NULL );
3387 }
3388 pthread_mutex_destroy( &mutex_counter );
3389 free( handle );
3390 free( targ );
3391 free( iaddsharept );
3392 }
3393 else
3394 #endif
3395 {
3396 thread_arg_t *targ;
3397 targ = calloc( 1, sizeof( thread_arg_t ) );
3398 targ[0].follows = follows;
3399 targ[0].njob = njob;
3400 targ[0].nadd = nadd;
3401 targ[0].nlen = nlen;
3402 targ[0].name = name;
3403 targ[0].seq = seq;
3404 targ[0].localhomtable = localhomtable;
3405 targ[0].iscore = iscore;
3406 targ[0].nscore = nscore;
3407 targ[0].istherenewgap = istherenewgap;
3408 targ[0].newgaplist = newgaplist_o;
3409 targ[0].singlerna = singlerna;
3410 targ[0].eff_kozo_mapped = eff_kozo_mapped;
3411 targ[0].alloclen = alloclen;
3412 targ[0].dep = dep;
3413 targ[0].topol = topol;
3414 targ[0].len = len;
3415 targ[0].addtree = addtree;
3416 targ[0].deletelist = deletelist;
3417 addsinglethread( targ );
3418 free( targ );
3419 }
3420 free( dep );
3421 FreeFloatMtx( len );
3422 if( multidist || tuplesize > 0 ) FreeFloatMtx( nscore );
3423
3424
3425 // for( i=0; i<nadd; i++ ) fprintf( stdout, ">%s (%d) \n%s\n", name[norg+i], norg+i, seq[norg+i] );
3426
3427 if( treeout )
3428 {
3429 fp = fopen( "infile.tree", "a" );
3430 if( fp == 0 )
3431 {
3432 fprintf( stderr, "File error!\n" );
3433 exit( 1 );
3434 }
3435 for( i=0; i<nadd; i++ )
3436 {
3437 fprintf( fp, "\n" );
3438 fprintf( fp, "%8d: %s\n", norg+i+1, name[norg+i]+1 );
3439 fprintf( fp, " nearest sequence: %d\n", addtree[i].nearest + 1 );
3440 fprintf( fp, " approximate distance: %f\n", addtree[i].dist1 );
3441 fprintf( fp, " sister group: %s\n", addtree[i].neighbors );
3442 fprintf( fp, " approximate distance: %f\n", addtree[i].dist2 );
3443 free( addtree[i].neighbors );
3444 }
3445 fclose( fp );
3446 free( addtree );
3447 }
3448
3449 for( iadd=0; iadd<nadd; iadd++ )
3450 {
3451 f = follows[iadd];
3452 for( i=0; follower[f][i]!=-1; i++ )
3453 ;
3454 if( !(follower[f] = realloc( follower[f], (i+2)*sizeof(int) ) ) )
3455 {
3456 fprintf( stderr, "Cannot reallocate follower[]" );
3457 exit( 1 );
3458 }
3459 follower[f][i] = iadd;
3460 follower[f][i+1] = -1;
3461 #if 0
3462 fprintf( stderr, "\nfollowers of %d = ", f );
3463 for( i=0; follower[f][i]!=-1; i++ )
3464 fprintf( stderr, "%d ", follower[f][i] );
3465 fprintf( stderr, "\n" );
3466 #endif
3467 }
3468
3469 orderfp = fopen( "order", "w" );
3470 if( !orderfp )
3471 {
3472 fprintf( stderr, "Cannot open 'order'\n" );
3473 exit( 1 );
3474 }
3475 for( i=0; ordertable[i]!=-1; i++ )
3476 {
3477 fprintf( orderfp, "%d\n", ordertable[i] );
3478 // for( j=0; follower[i][j]!=-1; j++ )
3479 // fprintf( orderfp, "%d\n", follower[i][j]+norg );
3480 for( j=0; follower[ordertable[i]][j]!=-1; j++ )
3481 fprintf( orderfp, "%d\n", follower[ordertable[i]][j]+norg );
3482 // fprintf( orderfp, "%d -> %d\n", follower[i][j]+norg, i );
3483 }
3484 fclose( orderfp );
3485
3486 posmap = AllocateIntVec( lenfull+2 );
3487 realign = calloc( lenfull+2, sizeof( Blocktorealign ) );
3488 for( i=0; i<lenfull+1; i++ ) posmap[i] = i;
3489 for( i=0; i<lenfull+1; i++ )
3490 {
3491 realign[i].nnewres = 0;
3492 realign[i].start = 0;
3493 realign[i].end = 0;
3494 }
3495
3496 fprintf( stderr, "\n\nCombining ..\n" );
3497 fflush( stderr );
3498 tmpseqlen = alloclen * 100;
3499 tmpseq = AllocateCharMtx( 1, tmpseqlen );
3500
3501
3502 // check1 = AllocateCharVec( tmpseqlen );
3503 // check2 = AllocateCharVec( tmpseqlen );
3504 // gappick0( check2, seq[0] );
3505 for( iadd=0; iadd<nadd; iadd++ )
3506 {
3507 // fprintf( stderr, "%d / %d\r", iadd, nadd );
3508 fflush( stderr );
3509
3510 // fprintf( stderr, "\niadd == %d\n", iadd );
3511 makegaplistcompact( lenfull, posmap, newgaplist_compact, newgaplist_o[iadd] );
3512 if( iadd == 0 || istherenewgap[iadd] )
3513 {
3514 tmpseq1 = tmpseq[0];
3515 // gaplist2alnx( lenfull, tmpseq1, seq[0], newgaplist_o[iadd], posmap, tmpseqlen );
3516 gaplist2alnx( lenfull, tmpseq1, seq[0], newgaplist_compact, posmap, tmpseqlen );
3517 // fprintf( stderr, "len = %d ? %d\n", strlen( tmpseq1 ), alloclen );
3518 if( ( tmplen = strlen( tmpseq1 ) ) >= fullseqlen )
3519 {
3520 fullseqlen = tmplen * 2+1;
3521 // fprintf( stderr, "Length over!\n" );
3522 // fprintf( stderr, "strlen(tmpseq1)=%d\n", (int)strlen( tmpseq1 ) );
3523 fprintf( stderr, "reallocating..." );
3524 // fprintf( stderr, "alloclen=%d\n", alloclen );
3525 // fprintf( stderr, "Please recompile!\n" );
3526 // exit( 1 );
3527 for( i=0; i<njob; i++ )
3528 {
3529 seq[i] = realloc( seq[i], fullseqlen * sizeof( char ) );
3530 if( !seq[i] )
3531 {
3532 fprintf( stderr, "Cannot reallocate seq[][]\n" );
3533 exit( 1 );
3534 }
3535 }
3536 fprintf( stderr, "done.\n" );
3537 }
3538 strcpy( seq[0], tmpseq1 );
3539
3540 ien = norg+iadd;
3541 #ifdef enablemultithread
3542 if( nthread > 0 && ien > 500 )
3543 {
3544 gaplist2alnxthread_arg_t *targ;
3545 int jobpos;
3546 pthread_t *handle;
3547 pthread_mutex_t mutex;
3548 fprintf( stderr, "%d / %d (threads %d-%d)\r", iadd, nadd, 0, nthread );
3549
3550 targ = calloc( nthread, sizeof( gaplist2alnxthread_arg_t ) );
3551 handle = calloc( nthread, sizeof( pthread_t ) );
3552 pthread_mutex_init( &mutex, NULL );
3553 jobpos = 1;
3554 for( i=0; i<nthread; i++ )
3555 {
3556 // targ[i].thread_no = i;
3557 targ[i].ncycle = ien;
3558 targ[i].jobpospt = &jobpos;
3559 targ[i].tmpseqlen = tmpseqlen;
3560 targ[i].lenfull = lenfull;
3561 targ[i].seq = seq;
3562 // targ[i].newgaplist = newgaplist_o[iadd];
3563 targ[i].newgaplist = newgaplist_compact;
3564 targ[i].posmap = posmap;
3565 targ[i].mutex = &mutex;
3566
3567 pthread_create( handle+i, NULL, gaplist2alnxthread, (void *)(targ+i) );
3568 }
3569 for( i=0; i<nthread; i++ )
3570 {
3571 pthread_join( handle[i], NULL );
3572 }
3573 pthread_mutex_destroy( &mutex );
3574 free( handle );
3575 free( targ );
3576 }
3577 else
3578 #endif
3579 {
3580 fprintf( stderr, "%d / %d\r", iadd, nadd );
3581 for( i=1; i<ien; i++ )
3582 {
3583 tmpseq1 = tmpseq[0];
3584 if( i == 1 ) fprintf( stderr, " %d / %d\r", iadd, nadd );
3585 // gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_o[iadd], posmap, tmpseqlen );
3586 gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_compact, posmap, tmpseqlen );
3587 // fprintf( stderr, ">%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 );
3588 strcpy( seq[i], tmpseq1 );
3589 }
3590 }
3591 }
3592 tmpseq1 = tmpseq[0];
3593 // insertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap );
3594 insertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap );
3595 // fprintf( stderr, "%d = %s\n", iadd, tmpseq1 );
3596 eq2dash( tmpseq1 );
3597 strcpy( seq[norg+iadd], tmpseq1 );
3598
3599 // adjustposmap( lenfull, posmap, newgaplist_o[iadd] );
3600 adjustposmap( lenfull, posmap, newgaplist_compact );
3601 countnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda?
3602 // countnewres( lenfull, realign, posmap, newgaplist_compact ); // muda?
3603
3604 }
3605 fprintf( stderr, "\r done. \n\n" );
3606
3607 #if 0
3608 for( i=0; i<njob; i++ )
3609 {
3610 fprintf( stderr, ">%s\n", name[i] );
3611 fprintf( stderr, "%s\n", seq[i] );
3612 }
3613 #endif
3614
3615 #if 0
3616 fprintf( stderr, "realign[].nnewres = " );
3617 for( i=0; i<lenfull+1; i++ )
3618 {
3619 fprintf( stderr, "%d ", realign[i].nnewres );
3620 }
3621 fprintf( stderr, "\n" );
3622 for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
3623 #endif
3624
3625
3626 if( smoothing )
3627 {
3628 // for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
3629 smoothing1( njob, nadd, lenfull, seq, realign );
3630 // for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
3631 smoothing2( njob, nadd, lenfull, seq, realign );
3632 }
3633
3634 for( i=0; i<lenfull+1; i++ )
3635 {
3636 if( realign[i].nnewres > 1 )
3637 {
3638 // fprintf( stderr, "i=%d: %d-%d\n", i, realign[i].start, realign[i].end );
3639 fprintf( stderr, "\rRealigning %d/%d \r", i, lenfull );
3640 // zure = dorealignment_compact( realign+i, seq, &fullseqlen, norg );
3641 // zure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows );
3642 zure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows );
3643 #if 0
3644 gappick0( check1, seq[0] );
3645 fprintf( stderr, "check1 = %s\n", check1 );
3646 if( strcmp( check1, check2 ) )
3647 {
3648 fprintf( stderr, "CHANGED!!!!!\n" );
3649 exit( 1 );
3650 }
3651 #endif
3652 for( j=i+1; j<lenfull+1; j++ )
3653 {
3654 if( realign[j].nnewres )
3655 {
3656 realign[j].start -= zure;
3657 realign[j].end -= zure;
3658 }
3659 }
3660 }
3661 }
3662 FreeIntCub( topol );
3663 fprintf( stderr, "\r done. \n\n" );
3664 fflush( stderr );
3665
3666
3667 if( keeplength )
3668 {
3669 FILE *dlf;
3670 restoreoriginalgaps( njob, seq, originalgaps );
3671
3672 dlf = fopen( "_deletelist", "w" );
3673 for( i=0; i<nadd; i++ )
3674 {
3675 if( deletelist[i] )
3676 for( j=0; deletelist[i][j]!=-1; j++ )
3677 fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
3678 }
3679 fclose( dlf );
3680
3681 if( mapout )
3682 {
3683 dlf = fopen( "_deletemap", "w" );
3684 reconstructdeletemap( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
3685 fclose( dlf );
3686 }
3687 }
3688
3689
3690
3691 FreeIntMtx( newgaplist_o );
3692 FreeIntVec( newgaplist_compact );
3693 FreeIntVec( posmap );
3694 free( realign );
3695 free( istherenewgap );
3696 FreeIntMtx( follower );
3697 free( follows );
3698 free( ordertable );
3699 free( kozoarivec );
3700 free( nlen );
3701 FreeCharMtx( tmpseq );
3702 freeconstants();
3703 if( addbk ) FreeCharMtx( addbk ); addbk = NULL;
3704 if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
3705 if( originalgaps ) free( originalgaps ); originalgaps = NULL;
3706
3707 writeData_pointer( prep_g, njob, name, nlen, seq );
3708 #if 0
3709 writeData( stdout, njob, name, nlen, bseq );
3710 writePre( njob, name, nlen, bseq, !contin );
3711 writeData_pointer( prep_g, njob, name, nlen, aseq );
3712 #endif
3713 #if IODEBUG
3714 fprintf( stderr, "OSHIMAI\n" );
3715 #endif
3716
3717 #if SMALLMEMORY
3718 if( multidist )
3719 {
3720 // if( constraint ) FreeLocalHomTable_two( localhomtable, norg, nadd );
3721 if( constraint ) FreeLocalHomTable_one( localhomtable, norg, nadd );
3722 }
3723 else
3724 #endif
3725 {
3726 if( constraint ) FreeLocalHomTable( localhomtable, njob );
3727 }
3728
3729 SHOWVERSION;
3730 if( ndeleted > 0 )
3731 {
3732 reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted );
3733 if( mapout )
3734 reporterr( "The deleted letters are shown in the (filename).map file.\n" );
3735 else
3736 reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" );
3737 }
3738 return( 0 );
3739 }
3740