1 #include "mltaln.h"
2
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 0
6
7 static int usecache;
8 static char *whereispairalign;
9 static char *odir;
10 static char *pdir;
11 static double scale;
12 static int *alreadyoutput;
13 static int equivthreshold;
14 static int equivwinsize;
15 static int equivshortestlen;
16
cutpath(char * s)17 static void cutpath( char *s )
18 {
19 char *pos;
20 pos = s + strlen( s );
21
22 while( --pos >= s )
23 {
24 if( *pos == '/' ) break;
25 }
26
27 strcpy( s, pos+1 );
28 }
29
getchainid(char * s)30 static char getchainid( char *s )
31 {
32 s += strlen( s ) - 2;
33 if( isspace( s[0] ) && isalnum( s[1] ) )
34 return( s[1] );
35 else
36 return( 'A' );
37 }
38
extractfirstword(char * s)39 static void extractfirstword( char *s )
40 {
41 while( *s )
42 {
43 if( isspace( *s ) ) break;
44 s++;
45 }
46 *s = 0;
47 }
48
strip(char * s)49 static char *strip( char *s )
50 {
51 char *v;
52
53 while( *s )
54 {
55 if( !isspace( *s ) ) break;
56 s++;
57 }
58 v = s;
59
60 s += strlen( v ) - 1;
61 while( s>=v )
62 {
63 if( !isspace( *s ) )
64 {
65 *(s+1) = 0;
66 break;
67 }
68 s--;
69 }
70
71 return( v );
72 }
73
74 #if 0
75 static void makeequivdouble( double *d, char *c )
76 {
77 while( *c )
78 {
79 *d++ = (double)( *c++ - '0' );
80 }
81 }
82
83 static void maskequiv( double *d, int n )
84 {
85 int halfwin;
86 int ok;
87 int i, j;
88
89 halfwin = (int)( equivwinsize / 2 );
90
91 for( i=0; i<n; i++ )
92 {
93 ok = 1;
94 for( j = i-halfwin; j<i+halfwin; j++ )
95 {
96 if( j<0 || n=<j ) continue;
97 if( d[j] <= 0.0 )
98 {
99 ok = 0;
100 break;
101 }
102 }
103 if( ok == 0 ) d[i] = 0.0;
104 }
105 }
106 #else
maskequiv(double * d,int n)107 static void maskequiv( double *d, int n )
108 {
109 int i, len;
110 int count;
111 len = 0;
112 double *dbk, *dori, *dbkori;
113
114 dbk = calloc( n, sizeof( double ) );
115
116 dbkori = dbk;
117 dori = d;
118 count = n;
119 while( count-- )
120 {
121 *dbk++ = *d++;
122 }
123
124 dbk = dbkori;
125 d = dori;
126 len = 0;
127
128
129 for( i=0; i<n; i++ )
130 {
131 if( d[i] > 0.0 )
132 {
133 len += 1;
134 d[i] = 0.0;
135 }
136 else
137 {
138 d[i] = 0.0;
139 if( len >= equivshortestlen )
140 {
141 len++;
142 while( len-- ) d[i-len] = dbk[i-len];
143 }
144 len = 0;
145 }
146 }
147
148 if( len >= equivshortestlen )
149 {
150 len++;
151 while( len-- ) d[n-len] = dbk[n-len];
152 }
153
154 free( dbk );
155 }
156 #endif
157
makeequivdouble_tmalign(double * d,char * c,int n)158 static void makeequivdouble_tmalign( double *d, char *c, int n )
159 {
160 double tmpd;
161 double *dbk;
162 int tmpi;
163 char s;
164 dbk = d;
165 while( *c )
166 {
167 if( ( s=*c++ ) == ':' )
168 tmpi = 9;
169 else if( s == '.' )
170 tmpi = 4;
171 else
172 tmpi = 0;
173 // tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0;
174 // if( tmpd < 0.0 ) tmpd = 0.0;
175 tmpd = (double)( tmpi );
176 // *d++ = (int)tmpd;
177 *d++ = tmpd;
178 }
179
180 d = dbk;
181 // maskequiv( d, n );
182 }
183
makeequivdouble_threshold(double * d,char * c,int n)184 static void makeequivdouble_threshold( double *d, char *c, int n )
185 {
186 double tmpd;
187 double *dbk;
188 int tmpi;
189 dbk = d;
190 while( *c )
191 {
192 tmpi = (int)( *c++ - '0' );
193 tmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0;
194 if( tmpd < 0.0 ) tmpd = 0.0;
195 // *d++ = (int)tmpd;
196 *d++ = tmpd;
197 }
198
199 d = dbk;
200 maskequiv( d, n );
201 }
202
readtmalign(FILE * fp,char * seq1,char * seq2,double * equiv)203 static void readtmalign( FILE *fp, char *seq1, char *seq2, double *equiv )
204 {
205 static char *line = NULL;
206 static char *equivchar = NULL;
207 int n;
208
209
210 if( equivchar == NULL )
211 {
212 equivchar = calloc( nlenmax * 2 + 1, sizeof( char ) );
213 line = calloc( nlenmax * 2 + 1, sizeof( char ) );
214 }
215 seq1[0] = 0;
216 seq2[0] = 0;
217 equivchar[0] = 0;
218
219
220 // system( "vi _tmalignout" );
221 while( 1 )
222 {
223 if( feof( fp ) )
224 {
225 fprintf( stderr, "Error in TMalign\n" );
226 exit( 1 );
227 }
228 fgets( line, 999, fp );
229 // fprintf( stdout, "line = :%s:\n", line );
230 if( !strncmp( line+5, "denotes the residue pairs", 20 ) ) break;
231 }
232 fgets( line, nlenmax*2, fp );
233 strcat( seq1, strip( line ) );
234
235 fgets( line, nlenmax*2, fp );
236 strcat( equivchar, strip( line ) );
237
238 fgets( line, nlenmax*2, fp );
239 strcat( seq2, strip( line ) );
240
241 #if 0
242 printf( "seq1=%s\n", seq1 );
243 printf( "seq2=%s\n", seq2 );
244 printf( "equi=%s\n", equivchar );
245 exit( 1 );
246 #endif
247 n = strlen( seq1 );
248 makeequivdouble_tmalign( equiv, equivchar, n );
249
250 #if 0
251 fprintf( stdout, "\n" );
252 for( i=0; i<n; i++ )
253 {
254 fprintf( stdout, "%1.0f", equiv[i] );
255 }
256 fprintf( stdout, "\n" );
257 exit( 1 );
258 #endif
259 }
readrash(FILE * fp,char * seq1,char * seq2,double * equiv)260 static void readrash( FILE *fp, char *seq1, char *seq2, double *equiv )
261 {
262 char line[1000];
263 static char *equivchar = NULL;
264 int n;
265
266
267 if( equivchar == NULL )
268 {
269 equivchar = calloc( nlenmax * 2, sizeof( char ) );
270 }
271 seq1[0] = 0;
272 seq2[0] = 0;
273 equivchar[0] = 0;
274
275 while( 1 )
276 {
277 fgets( line, 999, fp );
278 // fprintf( stdout, "line = :%s:\n", line );
279 if( !strncmp( line, "Query ", 6 ) ) break;
280 if( feof( fp ) ) break;
281 if( !strncmp( line, "QUERY ", 6 ) )
282 {
283 strcat( seq1, strip( line+5 ) );
284 fgets( line, 999, fp );
285 }
286 if( !strncmp( line, "TEMPL ", 6 ) )
287 {
288 strcat( seq2, strip( line+5 ) );
289 fgets( line, 999, fp );
290 }
291 if( !strncmp( line, "Equiva", 6 ) )
292 {
293 strcat( equivchar, strip( line+11 ) );
294 fgets( line, 999, fp );
295 }
296 }
297 #if 0
298 printf( "seq1=:%s:\n", seq1 );
299 printf( "seq2=:%s:\n", seq2 );
300 printf( "equi=:%s:\n", equivchar );
301 exit( 1 );
302 #endif
303 n = strlen( seq1 );
304 makeequivdouble_threshold( equiv, equivchar, n );
305
306 #if 0
307 fprintf( stdout, "\n" );
308 for( i=0; i<n; i++ )
309 {
310 fprintf( stdout, "%1.0f", equiv[i] );
311 }
312 fprintf( stdout, "\n" );
313 #endif
314 }
315
checkcbeta(FILE * fp)316 static int checkcbeta( FILE *fp )
317 {
318 char linec[1000];
319 while( 1 )
320 {
321 fgets( linec, 999, fp );
322 if( feof( fp ) ) break;
323 if( !strncmp( "ATOM ", linec, 5 ) )
324 {
325 if( !strncmp( "CB ", linec+13, 3 ) ) return( 0 );
326 }
327 }
328 return( 1 );
329 }
330
331
calltmalign(char ** mseq1,char ** mseq2,double * equiv,char * fname1,char * chain1,char * fname2,char * chain2,int alloclen)332 static double calltmalign( char **mseq1, char **mseq2, double *equiv, char *fname1, char *chain1, char *fname2, char *chain2, int alloclen )
333 {
334 FILE *fp;
335 int res;
336 static char com[10000];
337 double value;
338 char cachedir[10000];
339 char cachefile[10000];
340 int runnow;
341
342
343 if( usecache )
344 {
345 sprintf( cachedir, "%s/.tmalignoutcache", getenv( "HOME" ) );
346 sprintf( com, "mkdir -p %s", cachedir );
347 system( com );
348
349 sprintf( cachefile, "%s/%s%s-%s%s", cachedir, fname1, chain1, fname2, chain2 );
350
351 runnow = 0;
352 fp = fopen( cachefile, "r" );
353 if( fp == NULL ) runnow = 1;
354 else
355 {
356 fgets( com, 100, fp );
357 if( strncmp( com, "successful", 10 ) ) runnow = 1;
358 fclose( fp );
359 }
360 }
361 else
362 {
363 runnow = 1;
364 }
365
366 if( runnow )
367 {
368 #if 0
369 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname1, fname1 );
370 res = system( com );
371 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 );
372 res = system( com );
373 #endif
374 sprintf( com, "\"%s/TMalign\" %s.pdb %s.pdb > _tmalignout 2>_dum", whereispairalign, fname1, fname2 );
375 fprintf( stderr, "command = %s\n", com );
376 res = system( com );
377 if( res )
378 {
379 fprintf( stderr, "Error in TMalign\n" );
380 exit( 1 );
381 }
382
383 }
384 else
385 {
386 fprintf( stderr, "Cache is not supported!\n" );
387 exit( 1 );
388 }
389
390 fp = fopen( "_tmalignout", "r" );
391 if( !fp )
392 {
393 fprintf( stderr, "Cannot open _tmalignout\n" );
394 exit( 1 );
395 }
396
397 readtmalign( fp, *mseq1, *mseq2, equiv );
398
399 fclose( fp );
400
401 // fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
402 // fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
403
404 value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
405
406 return( value );
407 }
408
callrash(int mem1,int mem2,char ** mseq1,char ** mseq2,double * equiv,char * fname1,char * fname2,int alloclen)409 static double callrash( int mem1, int mem2, char **mseq1, char **mseq2, double *equiv, char *fname1, char *fname2, int alloclen )
410 {
411 FILE *fp;
412 // int res;
413 static char com[10000];
414 double value;
415 char cachedir[10000];
416 char cachefile[10000];
417 int runnow;
418 char pairid[1000];
419
420 sprintf( pairid, "%d-%d", mem1, mem2 );
421 // fprintf( stderr, "pairid = %s\n", pairid );
422
423 if( usecache )
424 {
425 // sprintf( cachedir, "tmp" );
426 sprintf( cachedir, "%s", pdir );
427
428 sprintf( cachefile, "%s/%s.%s.rash", cachedir, fname1, fname2 );
429
430 runnow = 0;
431 fp = fopen( cachefile, "r" );
432 if( fp == NULL )
433 {
434 fprintf( stderr, "Cannot open %s\n", cachefile );
435 exit( 1 );
436 }
437 else
438 {
439 fclose( fp );
440 }
441 }
442 else
443 {
444 fprintf( stderr, "Not supported!\n" );
445 exit( 1 );
446 }
447
448 #if 0
449 if( 0 )
450 {
451 #if 0
452 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname1, fname1 );
453 res = system( com );
454 sprintf( com, "ln -s %s %s.pdb 2>_dum", fname2, fname2 );
455 res = system( com );
456 #endif
457 #if 0 // 091127, pdp nai!
458 sprintf( com, "env PATH=%s PDP_ASH.pl --qf %s.pdb --qc %s --tf %s.pdb --tc %s > _rashout 2>_dum", whereispairalign, fname1, chain1, fname2, chain2 );
459 #else
460 sprintf( com, "\"%s/rash\" --qf %s.pdb --qc %s --tf %s.pdb --tc %s --of %s.pdbpair > %s.rashout 2>%s.dum", whereispairalign, fname1, chain1, fname2, chain2, pairid, pairid, pairid );
461 #endif
462 fprintf( stderr, "command = %s\n", com );
463 res = system( com );
464 if( res )
465 {
466 fprintf( stderr, "Error in structural alignment\n" );
467 exit( 1 );
468 }
469 sprintf( com, "awk '/^REMARK/,/^TER/' %s.pdbpair > %s.%s-x-%s.%s.pdbpair", pairid, fname1, chain1, fname2, chain2 );
470 res = system( com );
471
472 sprintf( com, "awk '/^REMARK/,/^TER/{next} 1' %s.pdbpair > %s.%s-x-%s.%s.pdbpair", pairid, fname2, chain2, fname1, chain1 );
473 res = system( com );
474
475 sprintf( com, "rm %s.pdbpair", pairid );
476 res = system( com );
477
478
479 }
480 else
481 #endif
482 {
483 fprintf( stderr, "Use cache! cachefile = %s\n", cachefile );
484 sprintf( com, "cat %s > %s.rashout", cachefile, pairid );
485 system( com );
486 }
487
488 if( usecache && runnow )
489 {
490 fprintf( stderr, "Okashii! usechache=%d, runnow=%d\n", usecache, runnow );
491 exit( 1 );
492 }
493
494 sprintf( com, "%s.rashout", pairid );
495 fp = fopen( com, "r" );
496 if( !fp )
497 {
498 fprintf( stderr, "Cannot open %s\n", com );
499 exit( 1 );
500 }
501
502 readrash( fp, *mseq1, *mseq2, equiv );
503
504 fclose( fp );
505
506 // fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
507 // fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
508
509
510 value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
511
512 return( value );
513 }
514
preparetmalign(FILE * fp,char *** strfiles,char *** chainids,char *** seqpt,char *** mseq1pt,char *** mseq2pt,double ** equivpt,int * alloclenpt)515 static void preparetmalign( FILE *fp, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt )
516 {
517 int i, res;
518 char *dumseq;
519 char line[1000];
520 char fname[1000];
521 char command[1000];
522 int linenum, istr, nstr;
523 FILE *checkfp;
524 char *sline;
525 int use[10000];
526 linenum = 0;
527 nstr = 0;
528 while( 1 )
529 {
530 fgets( line, 999, fp );
531 if( feof( fp ) ) break;
532 sline = strip( line );
533 use[linenum] = 1;
534 if( sline[0] == '#' || strlen( sline ) < 2 )
535 {
536 use[linenum] = 0;
537 linenum++;
538 continue;
539 }
540 extractfirstword( sline );
541 checkfp = fopen( sline, "r" );
542 if( checkfp == NULL )
543 {
544 fprintf( stderr, "Cannot open %s.\n", sline );
545 exit( 1 );
546 }
547 #if 0
548 fgets( linec, 999, checkfp );
549 if( strncmp( "HEADER ", linec, 7 ) )
550 {
551 fprintf( stderr, "Check the format of %s.\n", sline );
552 exit( 1 );
553 }
554 #endif
555 if( checkcbeta( checkfp ) )
556 {
557 fprintf( stderr, "%s has no C-beta atoms.\n", sline );
558 exit( 1 );
559 }
560 else
561 nstr++;
562 fclose( checkfp );
563 linenum++;
564 }
565 njob = nstr;
566 fprintf( stderr, "nstr = %d\n", nstr );
567
568 *strfiles = AllocateCharMtx( nstr, 1000 );
569 *chainids = AllocateCharMtx( nstr, 2 );
570
571 rewind( fp );
572 istr = 0;
573 linenum = 0;
574 while( 1 )
575 {
576 fgets( line, 999, fp );
577 if( feof( fp ) ) break;
578 sline = strip( line );
579 if( use[linenum++] )
580 {
581 (*chainids)[istr][0] = getchainid( sline );
582 (*chainids)[istr][1] = 0;
583 extractfirstword( sline );
584 sprintf( fname, "%s", sline );
585 cutpath( fname );
586 sprintf( command, "cp %s %s.pdb", sline, fname );
587 system( command );
588 sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname );
589 res = system( command );
590 if( res )
591 {
592 fprintf( stderr, "error: Install clean.pl\n" );
593 exit( 1 );
594 }
595 strcpy( (*strfiles)[istr++], fname );
596 }
597 }
598
599 *seqpt = AllocateCharMtx( njob, nlenmax*2+1 );
600 *mseq1pt = AllocateCharMtx( njob, 0 );
601 *mseq2pt = AllocateCharMtx( njob, 0 );
602 *equivpt = AllocateDoubleVec( nlenmax*2+1 );
603 *alloclenpt = nlenmax*2;
604 dumseq = AllocateCharVec( nlenmax*2+1 );
605 alreadyoutput = AllocateIntVec( njob );
606 for( i=0; i<njob; i++ ) alreadyoutput[i] = 0;
607
608 for( i=0; i<istr; i++ )
609 {
610 fprintf( stderr, "i=%d\n", i );
611 (*seqpt)[i][0] = 0;
612
613 (*mseq1pt)[0] = (*seqpt)[i];
614 (*mseq2pt)[0] = dumseq;
615
616 calltmalign( *mseq1pt, *mseq2pt, *equivpt, (*strfiles)[i], (*chainids)[i], (*strfiles)[i], (*chainids)[i], *alloclenpt );
617 fprintf( stdout, ">%d_%s-%s\n%s\n", i+1, (*strfiles)[i], (*chainids)[i], (*seqpt)[i] );
618 alreadyoutput[i] = 1;
619 }
620 }
621
prepareash(FILE * fp,char * inputfile,char *** strfiles,char *** chainids,char *** seqpt,char *** mseq1pt,char *** mseq2pt,double ** equivpt,int * alloclenpt)622 static void prepareash( FILE *fp, char *inputfile, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt )
623 {
624 int i, res;
625 char *dumseq;
626 char line[1000];
627 char fname[1000];
628 char command[1000];
629 int linenum, istr, nstr;
630 // FILE *checkfp;
631 char *sline;
632 int use[10000];
633 linenum = 0;
634 nstr = 0;
635
636 fprintf( stderr, "inputfile = %s\n", inputfile );
637 while( 1 )
638 {
639 fgets( line, 999, fp );
640 if( feof( fp ) ) break;
641 sline = strip( line );
642 use[linenum] = 1;
643 if( sline[0] == '#' || strlen( sline ) < 2 )
644 {
645 use[linenum] = 0;
646 linenum++;
647 continue;
648 }
649 extractfirstword( sline );
650 #if 0
651 checkfp = fopen( sline, "r" );
652 if( checkfp == NULL )
653 {
654 fprintf( stderr, "Cannot open %s.\n", sline );
655 exit( 1 );
656 }
657 if( checkcbeta( checkfp ) )
658 {
659 fprintf( stderr, "%s has no C-beta atoms.\n", sline );
660 exit( 1 );
661 }
662 else
663 nstr++;
664 fclose( checkfp );
665 #else
666 nstr++;
667 #endif
668 linenum++;
669 }
670 njob = nstr;
671 fprintf( stderr, "nstr = %d\n", nstr );
672
673 *strfiles = AllocateCharMtx( nstr, 1000 );
674 *chainids = AllocateCharMtx( nstr, 2 );
675
676 rewind( fp );
677 istr = 0;
678 linenum = 0;
679 while( 1 )
680 {
681 fgets( line, 999, fp );
682 if( feof( fp ) ) break;
683 sline = strip( line );
684 fprintf( stderr, "sline = %s\n", sline );
685 if( use[linenum++] )
686 {
687 (*chainids)[istr][0] = getchainid( sline );
688 (*chainids)[istr][1] = 0;
689 extractfirstword( sline );
690 sprintf( fname, "%s", sline );
691 cutpath( fname );
692 #if 0
693 sprintf( command, "cp %s %s.pdb", sline, fname );
694 system( command );
695 sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname );
696 res = system( command );
697 if( res )
698 {
699 fprintf( stderr, "error: Install clean.pl\n" );
700 exit( 1 );
701 }
702 #endif
703 strcpy( (*strfiles)[istr++], fname );
704 }
705 }
706
707 *seqpt = AllocateCharMtx( njob, nlenmax*2+1 );
708 *mseq1pt = AllocateCharMtx( njob, 0 );
709 *mseq2pt = AllocateCharMtx( njob, 0 );
710 *equivpt = AllocateDoubleVec( nlenmax*2+1 );
711 *alloclenpt = nlenmax*2;
712 dumseq = AllocateCharVec( nlenmax*2+1 );
713 alreadyoutput = AllocateIntVec( njob );
714 for( i=0; i<njob; i++ ) alreadyoutput[i] = 0;
715
716 fprintf( stderr, "Running pdp_ash_batch.pl..\n" );
717 // sprintf( command, "/opt/protein/share/domains/code/pdp_ash/pdp_ash_batch.pl -f %s -d tmp -i %d", inputfile, wheretooutput );
718 sprintf( command, "/opt/protein/share/mafftash/pdp_ash/pdp_ash_batch.pl -f %s -d %s -i %s", inputfile, pdir, odir );
719 res = system( command );
720 if( res )
721 {
722 fprintf( stderr, "Ask KM!\n" );
723 exit( 1 );
724 }
725 fprintf( stderr, "done\n" );
726
727
728 for( i=0; i<istr; i++ )
729 {
730 fprintf( stderr, "i=%d\n", i );
731 (*seqpt)[i][0] = 0;
732
733 (*mseq1pt)[0] = (*seqpt)[i];
734 (*mseq2pt)[0] = dumseq;
735
736 callrash( i, i, *mseq1pt, *mseq2pt, *equivpt, (*strfiles)[i], (*strfiles)[i], *alloclenpt );
737 fprintf( stdout, ">%d_%s\n%s\n", i+1, (*strfiles)[i], (*seqpt)[i] );
738 alreadyoutput[i] = 1;
739 }
740 }
741
arguments(int argc,char * argv[])742 void arguments( int argc, char *argv[] )
743 {
744 int c;
745
746 usecache = 0;
747 scale = 1.0;
748 equivthreshold = 5;
749 equivwinsize = 5;
750 equivshortestlen = 1;
751 inputfile = NULL;
752 fftkeika = 0;
753 pslocal = -1000.0;
754 constraint = 0;
755 nblosum = 62;
756 fmodel = 0;
757 calledByXced = 0;
758 devide = 0;
759 use_fft = 0;
760 fftscore = 1;
761 fftRepeatStop = 0;
762 fftNoAnchStop = 0;
763 weight = 3;
764 utree = 1;
765 tbutree = 1;
766 refine = 0;
767 check = 1;
768 cut = 0.0;
769 disp = 0;
770 outgap = 1;
771 alg = 'R';
772 mix = 0;
773 tbitr = 0;
774 scmtd = 5;
775 tbweight = 0;
776 tbrweight = 3;
777 checkC = 0;
778 treemethod = 'x';
779 contin = 0;
780 scoremtx = 1;
781 kobetsubunkatsu = 0;
782 divpairscore = 0;
783 dorp = NOTSPECIFIED;
784 ppenalty = NOTSPECIFIED;
785 ppenalty_OP = NOTSPECIFIED;
786 ppenalty_ex = NOTSPECIFIED;
787 ppenalty_EX = NOTSPECIFIED;
788 poffset = NOTSPECIFIED;
789 kimuraR = NOTSPECIFIED;
790 pamN = NOTSPECIFIED;
791 geta2 = GETA2;
792 fftWinSize = NOTSPECIFIED;
793 fftThreshold = NOTSPECIFIED;
794 RNAppenalty = NOTSPECIFIED;
795 RNApthr = NOTSPECIFIED;
796 odir = "";
797 pdir = "";
798
799 while( --argc > 0 && (*++argv)[0] == '-' )
800 {
801 while ( ( c = *++argv[0] ) )
802 {
803 switch( c )
804 {
805 case 'i':
806 inputfile = *++argv;
807 fprintf( stderr, "inputfile = %s\n", inputfile );
808 --argc;
809 goto nextoption;
810 case 'f':
811 ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
812 --argc;
813 goto nextoption;
814 case 'g':
815 ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
816 --argc;
817 goto nextoption;
818 case 'O':
819 ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 );
820 --argc;
821 goto nextoption;
822 case 'E':
823 ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 );
824 --argc;
825 goto nextoption;
826 case 'h':
827 poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
828 --argc;
829 goto nextoption;
830 case 'k':
831 kimuraR = myatoi( *++argv );
832 // fprintf( stderr, "kimuraR = %d\n", kimuraR );
833 --argc;
834 goto nextoption;
835 case 'b':
836 nblosum = myatoi( *++argv );
837 scoremtx = 1;
838 // fprintf( stderr, "blosum %d\n", nblosum );
839 --argc;
840 goto nextoption;
841 case 'j':
842 pamN = myatoi( *++argv );
843 scoremtx = 0;
844 TMorJTT = JTT;
845 fprintf( stderr, "jtt %d\n", pamN );
846 --argc;
847 goto nextoption;
848 case 'm':
849 pamN = myatoi( *++argv );
850 scoremtx = 0;
851 TMorJTT = TM;
852 fprintf( stderr, "TM %d\n", pamN );
853 --argc;
854 goto nextoption;
855 case 'd':
856 whereispairalign = *++argv;
857 fprintf( stderr, "whereispairalign = %s\n", whereispairalign );
858 --argc;
859 goto nextoption;
860 case 'o':
861 odir = *++argv;
862 fprintf( stderr, "odir = %s\n", odir );
863 --argc;
864 goto nextoption;
865 case 'p':
866 pdir = *++argv;
867 fprintf( stderr, "pdir = %s\n", pdir );
868 --argc;
869 goto nextoption;
870 case 't':
871 equivthreshold = myatoi( *++argv );
872 --argc;
873 goto nextoption;
874 case 'w':
875 equivwinsize = myatoi( *++argv );
876 --argc;
877 goto nextoption;
878 case 'l':
879 equivshortestlen = myatoi( *++argv );
880 --argc;
881 goto nextoption;
882 case 's':
883 scale = atof( *++argv );
884 --argc;
885 goto nextoption;
886 case 'c':
887 usecache = 1;
888 break;
889 #if 1
890 case 'a':
891 fmodel = 1;
892 break;
893 #endif
894 case 'r':
895 fmodel = -1;
896 break;
897 case 'D':
898 dorp = 'd';
899 break;
900 case 'P':
901 dorp = 'p';
902 break;
903 case 'e':
904 fftscore = 0;
905 break;
906 #if 0
907 case 'O':
908 fftNoAnchStop = 1;
909 break;
910 #endif
911 case 'Q':
912 calledByXced = 1;
913 break;
914 case 'x':
915 disp = 1;
916 break;
917 #if 0
918 case 'a':
919 alg = 'a';
920 break;
921 #endif
922 case 'S':
923 alg = 'S';
924 break;
925 case 'L':
926 alg = 'L';
927 break;
928 case 'B':
929 alg = 'B';
930 break;
931 case 'T':
932 alg = 'T';
933 break;
934 case 'H':
935 alg = 'H';
936 break;
937 case 'M':
938 alg = 'M';
939 break;
940 case 'R':
941 alg = 'R';
942 break;
943 case 'N':
944 alg = 'N';
945 break;
946 case 'K':
947 alg = 'K';
948 break;
949 case 'A':
950 alg = 'A';
951 break;
952 case 'V':
953 alg = 'V';
954 break;
955 case 'C':
956 alg = 'C';
957 break;
958 case 'F':
959 use_fft = 1;
960 break;
961 case 'v':
962 tbrweight = 3;
963 break;
964 case 'y':
965 divpairscore = 1;
966 break;
967 /* Modified 01/08/27, default: user tree */
968 case 'J':
969 tbutree = 0;
970 break;
971 /* modification end. */
972 #if 0
973 case 'z':
974 fftThreshold = myatoi( *++argv );
975 --argc;
976 goto nextoption;
977 case 'w':
978 fftWinSize = myatoi( *++argv );
979 --argc;
980 goto nextoption;
981 case 'Z':
982 checkC = 1;
983 break;
984 #endif
985 default:
986 fprintf( stderr, "illegal option %c\n", c );
987 argc = 0;
988 break;
989 }
990 }
991 nextoption:
992 ;
993 }
994 if( argc == 1 )
995 {
996 cut = atof( (*argv) );
997 argc--;
998 }
999 if( argc != 0 )
1000 {
1001 fprintf( stderr, "options: Check source file !\n" );
1002 exit( 1 );
1003 }
1004 if( tbitr == 1 && outgap == 0 )
1005 {
1006 fprintf( stderr, "conflicting options : o, m or u\n" );
1007 exit( 1 );
1008 }
1009 if( alg == 'C' && outgap == 0 )
1010 {
1011 fprintf( stderr, "conflicting options : C, o\n" );
1012 exit( 1 );
1013 }
1014 }
1015
countamino(char * s,int end)1016 int countamino( char *s, int end )
1017 {
1018 int val = 0;
1019 while( end-- )
1020 if( *s++ != '-' ) val++;
1021 return( val );
1022 }
1023
pairalign(char ** name,int nlen[M],char ** seq,char ** aseq,char ** mseq1,char ** mseq2,double * equiv,double * effarr,char ** strfiles,char ** chainids,int alloclen)1024 static void pairalign( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *equiv, double *effarr, char **strfiles, char **chainids, int alloclen )
1025 {
1026 int i, j, ilim;
1027 int clus1, clus2;
1028 int off1, off2;
1029 double pscore = 0.0; // by D.Mathog
1030 static char *indication1, *indication2;
1031 FILE *hat2p, *hat3p;
1032 static double **distancemtx;
1033 static double *effarr1 = NULL;
1034 static double *effarr2 = NULL;
1035 char *pt;
1036 char *hat2file = "hat2";
1037 LocalHom **localhomtable, *tmpptr;
1038 static char **pair;
1039 // int intdum;
1040 double bunbo;
1041 char **checkseq;
1042
1043
1044 localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
1045 for( i=0; i<njob; i++)
1046 {
1047
1048 localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
1049 for( j=0; j<njob; j++)
1050 {
1051 localhomtable[i][j].start1 = -1;
1052 localhomtable[i][j].end1 = -1;
1053 localhomtable[i][j].start2 = -1;
1054 localhomtable[i][j].end2 = -1;
1055 localhomtable[i][j].opt = -1.0;
1056 localhomtable[i][j].next = NULL;
1057 localhomtable[i][j].nokori = 0;
1058 }
1059 }
1060
1061 if( effarr1 == NULL )
1062 {
1063 distancemtx = AllocateDoubleMtx( njob, njob );
1064 effarr1 = AllocateDoubleVec( njob );
1065 effarr2 = AllocateDoubleVec( njob );
1066 indication1 = AllocateCharVec( 150 );
1067 indication2 = AllocateCharVec( 150 );
1068 checkseq = AllocateCharMtx( njob, alloclen );
1069 #if 0
1070 #else
1071 pair = AllocateCharMtx( njob, njob );
1072 #endif
1073 }
1074
1075 #if 0
1076 fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
1077 #endif
1078
1079 #if 0
1080 for( i=0; i<njob; i++ )
1081 fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
1082 #endif
1083
1084
1085 // writePre( njob, name, nlen, aseq, 0 );
1086
1087 for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
1088 for( i=0; i<njob; i++ ) pair[i][i] = 1;
1089
1090 for( i=0; i<njob; i++ )
1091 {
1092 strcpy( checkseq[i], seq[i] );
1093 // fprintf( stderr, "checkseq[%d] = %s\n", i, checkseq[i] );
1094 }
1095
1096
1097 ilim = njob - 1;
1098 for( i=0; i<ilim; i++ )
1099 {
1100 fprintf( stderr, "% 5d / %d\r", i, njob );
1101
1102
1103 for( j=i+1; j<njob; j++ )
1104 {
1105
1106
1107 #if 0
1108 if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 )
1109 {
1110 distancemtx[i][j] = pscore;
1111 continue;
1112 }
1113 #endif
1114
1115 strcpy( aseq[i], seq[i] );
1116 strcpy( aseq[j], seq[j] );
1117 clus1 = conjuctionfortbfast_old( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
1118 clus2 = conjuctionfortbfast_old( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
1119 // fprintf( stderr, "mseq1 = %s\n", mseq1[0] );
1120 // fprintf( stderr, "mseq2 = %s\n", mseq2[0] );
1121
1122 #if 0
1123 fprintf( stderr, "group1 = %.66s", indication1 );
1124 fprintf( stderr, "\n" );
1125 fprintf( stderr, "group2 = %.66s", indication2 );
1126 fprintf( stderr, "\n" );
1127 #endif
1128 // for( l=0; l<clus1; l++ ) fprintf( stderr, "## STEP-eff for mseq1-%d %f\n", l, effarr1[l] );
1129
1130 #if 1
1131 {
1132 switch( alg )
1133 {
1134 case( 'T' ):
1135 fprintf( stderr, " Calling tmalign %d-%d/%d \r", i+1, j+1, njob );
1136 pscore = calltmalign( mseq1, mseq2, equiv, strfiles[i], chainids[i], strfiles[j], chainids[j], alloclen );
1137 off1 = off2 = 0;
1138 break;
1139 case( 'R' ):
1140 fprintf( stderr, " Calling PDP_ASH.pl %d-%d/%d \r", i+1, j+1, njob );
1141 pscore = callrash( i, j, mseq1, mseq2, equiv, strfiles[i], strfiles[j], alloclen );
1142 off1 = off2 = 0;
1143 break;
1144 ErrorExit( "ERROR IN SOURCE FILE" );
1145 }
1146 }
1147 #endif
1148 distancemtx[i][j] = pscore;
1149 #if SCOREOUT
1150 fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j );
1151 #endif
1152
1153 putlocalhom_str( mseq1[0], mseq2[0], equiv, scale, localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
1154 #if 1
1155
1156 if( alreadyoutput[i] == 0 )
1157 {
1158 alreadyoutput[i] = 1;
1159 gappick0( seq[i], mseq1[0] );
1160 fprintf( stdout, ">%d_%s\n%s\n", i+1, strfiles[i], seq[i] );
1161 strcpy( checkseq[i], seq[i] );
1162 }
1163 else
1164 {
1165 gappick0( seq[i], mseq1[0] );
1166 fprintf( stderr, "checking seq%d\n", i );
1167
1168 // fprintf( stderr, " seq=%s\n", seq[i] );
1169 // fprintf( stderr, "checkseq=%s\n", checkseq[i] );
1170
1171 if( strcmp( checkseq[i], seq[i] ) )
1172 {
1173 fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" );
1174 fprintf( stderr, "i=%d\n", i );
1175 fprintf( stderr, " seq=%s\n", seq[i] );
1176 fprintf( stderr, "checkseq=%s\n", checkseq[i] );
1177 exit( 1 );
1178 }
1179 }
1180 if( alreadyoutput[j] == 0 )
1181 {
1182 alreadyoutput[j] = 1;
1183 gappick0( seq[j], mseq2[0] );
1184 fprintf( stdout, ">%d_%s-%s\n%s\n", j+1, strfiles[j], chainids[j], seq[j] );
1185 strcpy( checkseq[j], seq[j] );
1186 }
1187 else
1188 {
1189 gappick0( seq[j], mseq2[0] );
1190 fprintf( stderr, "checking seq%d\n", j );
1191 if( strcmp( checkseq[j], seq[j] ) )
1192 {
1193 fprintf( stderr, "\n\nWARNING: Sequence changed!!\n" );
1194 fprintf( stderr, "j=%d\n", j );
1195 fprintf( stderr, " seq=%s\n", seq[j] );
1196 fprintf( stderr, "checkseq=%s\n", checkseq[j] );
1197 exit( 1 );
1198 }
1199 }
1200 #endif
1201 }
1202 }
1203 for( i=0; i<njob; i++ )
1204 {
1205 pscore = 0.0;
1206 for( pt=seq[i]; *pt; pt++ )
1207 pscore += amino_dis[(int)*pt][(int)*pt];
1208 distancemtx[i][i] = pscore;
1209
1210 }
1211
1212 ilim = njob-1;
1213 for( i=0; i<ilim; i++ )
1214 {
1215 for( j=i+1; j<njob; j++ )
1216 {
1217 bunbo = MIN( distancemtx[i][i], distancemtx[j][j] );
1218 if( bunbo == 0.0 )
1219 distancemtx[i][j] = 2.0;
1220 else
1221 distancemtx[i][j] = ( 1.0 - distancemtx[i][j] / bunbo ) * 2.0;
1222 }
1223 }
1224
1225 hat2p = fopen( hat2file, "w" );
1226 if( !hat2p ) ErrorExit( "Cannot open hat2." );
1227 WriteHat2_pointer( hat2p, njob, name, distancemtx );
1228 fclose( hat2p );
1229
1230 fprintf( stderr, "##### writing hat3\n" );
1231 hat3p = fopen( "hat3", "w" );
1232 if( !hat3p ) ErrorExit( "Cannot open hat3." );
1233 ilim = njob-1;
1234 for( i=0; i<ilim; i++ )
1235 {
1236 for( j=i+1; j<njob; j++ )
1237 {
1238 for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
1239 {
1240 if( tmpptr->opt == -1.0 ) continue;
1241 fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d k\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 );
1242 }
1243 }
1244 }
1245 fclose( hat3p );
1246 #if DEBUG
1247 fprintf( stderr, "calling FreeLocalHomTable\n" );
1248 #endif
1249 FreeLocalHomTable( localhomtable, njob );
1250 #if DEBUG
1251 fprintf( stderr, "done. FreeLocalHomTable\n" );
1252 #endif
1253 }
1254
WriteOptions(FILE * fp)1255 static void WriteOptions( FILE *fp )
1256 {
1257
1258 if( dorp == 'd' ) fprintf( fp, "DNA\n" );
1259 else
1260 {
1261 if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
1262 else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
1263 else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
1264 }
1265 fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1266 if( use_fft ) fprintf( fp, "FFT on\n" );
1267
1268 fprintf( fp, "tree-base method\n" );
1269 if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
1270 else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
1271 if( tbitr || tbweight )
1272 {
1273 fprintf( fp, "iterate at each step\n" );
1274 if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
1275 if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
1276 if( tbweight ) fprintf( fp, " weighted\n" );
1277 fprintf( fp, "\n" );
1278 }
1279
1280 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
1281
1282 if( alg == 'a' )
1283 fprintf( fp, "Algorithm A\n" );
1284 else if( alg == 'A' )
1285 fprintf( fp, "Algorithm A+\n" );
1286 else if( alg == 'S' )
1287 fprintf( fp, "Apgorithm S\n" );
1288 else if( alg == 'C' )
1289 fprintf( fp, "Apgorithm A+/C\n" );
1290 else
1291 fprintf( fp, "Unknown algorithm\n" );
1292
1293 if( use_fft )
1294 {
1295 fprintf( fp, "FFT on\n" );
1296 if( dorp == 'd' )
1297 fprintf( fp, "Basis : 4 nucleotides\n" );
1298 else
1299 {
1300 if( fftscore )
1301 fprintf( fp, "Basis : Polarity and Volume\n" );
1302 else
1303 fprintf( fp, "Basis : 20 amino acids\n" );
1304 }
1305 fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
1306 fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
1307 }
1308 else
1309 fprintf( fp, "FFT off\n" );
1310 fflush( fp );
1311 }
1312
1313
main(int argc,char * argv[])1314 int main( int argc, char *argv[] )
1315 {
1316 static int nlen[M];
1317 static char **name, **seq;
1318 static char **mseq1, **mseq2;
1319 static char **aseq;
1320 static char **bseq;
1321 static double *eff;
1322 static double *equiv;
1323 char **strfiles;
1324 char **chainids;
1325 int i;
1326 FILE *infp;
1327 char c;
1328 int alloclen;
1329
1330 arguments( argc, argv );
1331
1332 if( equivthreshold < 1 || 9 < equivthreshold )
1333 {
1334 fprintf( stderr, "-t n, n must be 1..9\n" );
1335 exit( 1 );
1336 }
1337
1338 if( ( equivwinsize + 1 ) % 2 != 0 )
1339 {
1340 fprintf( stderr, "equivwinsize = %d\n", equivwinsize );
1341 fprintf( stderr, "It must be an odd number.\n" );
1342 exit( 1 );
1343 }
1344
1345 if( inputfile )
1346 {
1347 infp = fopen( inputfile, "r" );
1348 if( !infp )
1349 {
1350 fprintf( stderr, "Cannot open %s\n", inputfile );
1351 exit( 1 );
1352 }
1353 }
1354 else
1355 infp = stdin;
1356
1357 nlenmax = 10000; // tekitou
1358
1359 if( alg == 'R' )
1360 prepareash( infp, inputfile, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );
1361 else if( alg == 'T' )
1362 preparetmalign( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );
1363
1364 fclose( infp );
1365
1366 name = AllocateCharMtx( njob, B+1 );
1367 aseq = AllocateCharMtx( njob, nlenmax*2+1 );
1368 bseq = AllocateCharMtx( njob, nlenmax*2+1 );
1369 eff = AllocateDoubleVec( njob );
1370
1371 for( i=0; i<njob; i++ )
1372 {
1373 fprintf( stderr, "str%d = %s-%s\n", i, strfiles[i], chainids[i] );
1374 }
1375
1376 if( njob < 1 )
1377 {
1378 fprintf( stderr, "No structure found.\n" );
1379 exit( 1 );
1380 }
1381 if( njob < 2 )
1382 {
1383 fprintf( stderr, "Only %d structure found.\n", njob );
1384 exit( 0 );
1385 }
1386 if( njob > M )
1387 {
1388 fprintf( stderr, "The number of structures must be < %d\n", M );
1389 fprintf( stderr, "Please try sequence-based methods for such large data.\n" );
1390 exit( 1 );
1391 }
1392
1393
1394
1395 #if 0
1396 readData( infp, name, nlen, seq );
1397 #endif
1398
1399 constants( njob, seq );
1400
1401 #if 0
1402 fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
1403 #endif
1404
1405 initSignalSM();
1406
1407 initFiles();
1408
1409 WriteOptions( trap_g );
1410
1411 c = seqcheck( seq );
1412 if( c )
1413 {
1414 fprintf( stderr, "Illegal character %c\n", c );
1415 exit( 1 );
1416 }
1417
1418 // writePre( njob, name, nlen, seq, 0 );
1419
1420 for( i=0; i<njob; i++ ) eff[i] = 1.0;
1421
1422
1423 for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
1424
1425 pairalign( name, nlen, bseq, aseq, mseq1, mseq2, equiv, eff, strfiles, chainids, alloclen );
1426
1427 fprintf( trap_g, "done.\n" );
1428 #if DEBUG
1429 fprintf( stderr, "closing trap_g\n" );
1430 #endif
1431 fclose( trap_g );
1432
1433 // writePre( njob, name, nlen, aseq, !contin );
1434 #if 0
1435 writeData( stdout, njob, name, nlen, aseq );
1436 #endif
1437 #if IODEBUG
1438 fprintf( stderr, "OSHIMAI\n" );
1439 #endif
1440 SHOWVERSION;
1441 return( 0 );
1442 }
1443