1 #include "mltaln.h"
2 
3 #define DEBUG 0
4 #define IODEBUG 0
5 #define SCOREOUT 1
6 #define TSUYOSAFACTOR 100
7 
8 
9 static int nhomologs;
10 static int seedoffset;
11 
strip(char * s)12 void strip( char *s )
13 {
14 	char *pt = s;
15 	while( *++pt )
16 		if( *pt == '\n' ) *pt = 0;
17 }
18 
19 
arguments(int argc,char * argv[])20 void arguments( int argc, char *argv[] )
21 {
22     int c;
23 
24 	seedoffset = 0;
25 	nhomologs = 1;
26 	inputfile = NULL;
27 	fftkeika = 0;
28 	pslocal = -1000.0;
29 	constraint = 0;
30 	nblosum = 62;
31 	fmodel = 0;
32 	calledByXced = 0;
33 	devide = 0;
34 	use_fft = 0;
35 	fftscore = 1;
36 	fftRepeatStop = 0;
37 	fftNoAnchStop = 0;
38     weight = 3;
39     utree = 1;
40 	tbutree = 1;
41     refine = 0;
42     check = 1;
43     cut = 0.0;
44     disp = 0;
45     outgap = 1;
46     alg = 'A';
47     mix = 0;
48 	tbitr = 0;
49 	scmtd = 5;
50 	tbweight = 0;
51 	tbrweight = 3;
52 	checkC = 0;
53 	treemethod = 'x';
54 	contin = 0;
55 	scoremtx = 1;
56 	kobetsubunkatsu = 0;
57 	divpairscore = 0;
58 	dorp = NOTSPECIFIED;
59 	ppenalty = NOTSPECIFIED;
60 	ppenalty_OP = NOTSPECIFIED;
61 	ppenalty_ex = NOTSPECIFIED;
62 	ppenalty_EX = NOTSPECIFIED;
63 	poffset = NOTSPECIFIED;
64 	kimuraR = NOTSPECIFIED;
65 	pamN = NOTSPECIFIED;
66 	geta2 = GETA2;
67 	fftWinSize = NOTSPECIFIED;
68 	fftThreshold = NOTSPECIFIED;
69 
70     while( --argc > 0 && (*++argv)[0] == '-' )
71 	{
72         while ( ( c = *++argv[0] ) )
73 		{
74             switch( c )
75             {
76 				case 'i':
77 					inputfile = *++argv;
78 					fprintf( stderr, "seed = %s\n", inputfile );
79 					--argc;
80 					goto nextoption;
81 				case 't':
82 					nhomologs = myatoi( *++argv );
83 					fprintf( stderr, "nhomologs = %d\n", nhomologs );
84 					--argc;
85 					goto nextoption;
86 				case 'o':
87 					seedoffset = myatoi( *++argv );
88 					fprintf( stderr, "seedoffset = %d\n", seedoffset );
89 					--argc;
90 					goto nextoption;
91 				case 'D':
92 					dorp = 'd';
93 					break;
94 				case 'P':
95 					dorp = 'p';
96 					break;
97                 default:
98                     fprintf( stderr, "illegal option %c\n", c );
99                     argc = 0;
100                     break;
101             }
102 		}
103 		nextoption:
104 			;
105 	}
106     if( argc == 1 )
107     {
108         cut = atof( (*argv) );
109         argc--;
110     }
111     if( argc != 0 )
112     {
113         fprintf( stderr, "options: Check source file !\n" );
114         exit( 1 );
115     }
116 	if( tbitr == 1 && outgap == 0 )
117 	{
118 		fprintf( stderr, "conflicting options : o, m or u\n" );
119 		exit( 1 );
120 	}
121 	if( alg == 'C' && outgap == 0 )
122 	{
123 		fprintf( stderr, "conflicting options : C, o\n" );
124 		exit( 1 );
125 	}
126 }
127 
countamino(char * s,int end)128 int countamino( char *s, int end )
129 {
130 	int val = 0;
131 	while( end-- )
132 		if( *s++ != '-' ) val++;
133 	return( val );
134 }
135 
pairalign(char ** name,int nlen[M],char ** seq,double * effarr,int alloclen)136 static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen )
137 {
138 	int i, j;
139 	FILE *hat3p;
140 	double pscore = 0.0; // by D.Mathog
141 	static double *effarr1 = NULL;
142 	static double *effarr2 = NULL;
143 	char *aseq;
144 	static char **pseq;
145 	LocalHom **localhomtable, *tmpptr;
146 	double tsuyosa;
147 
148 	if( nhomologs < 1 ) nhomologs = 1; // tsuyosa=0.0 wo sakeru
149 	tsuyosa = (double)nhomologs * nhomologs * TSUYOSAFACTOR;
150 	fprintf( stderr, "tsuyosa = %f\n", tsuyosa );
151 	localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
152 	for( i=0; i<njob; i++)
153 	{
154 		localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
155 		for( j=0; j<njob; j++)
156 		{
157 			localhomtable[i][j].start1 = -1;
158 			localhomtable[i][j].end1 = -1;
159 			localhomtable[i][j].start2 = -1;
160 			localhomtable[i][j].end2 = -1;
161 			localhomtable[i][j].opt = -1.0;
162 			localhomtable[i][j].next = NULL;
163 		}
164 	}
165 
166 	if( effarr1 == NULL )
167 	{
168 		effarr1 = AllocateDoubleVec( njob );
169 		effarr2 = AllocateDoubleVec( njob );
170 		pseq = AllocateCharMtx( 2, 0 );
171 		aseq = AllocateCharVec( nlenmax*9+1 );
172 #if 0
173 #else
174 #endif
175 	}
176 
177 #if 0
178 	fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
179 #endif
180 
181 #if 0
182 	for( i=0; i<njob; i++ )
183 		fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
184 #endif
185 
186 
187 //	writePre( njob, name, nlen, aseq, 0 );
188 
189 	hat3p = fopen( "hat3", "w" );
190 	if( !hat3p ) ErrorExit( "Cannot open hat3." );
191 	fprintf( stderr, "\n" );
192 	for( i=0; i<njob-1; i++ )
193 	{
194 		for( j=i+1; j<njob; j++ )
195 		{
196 			pseq[0] = seq[i];
197 			pseq[1] = seq[j];
198 
199 			if( strlen( pseq[0] ) != strlen( pseq[1] ) )
200 			{
201 				fprintf( stderr, "## ERROR  ###\n" );
202 				fprintf( stderr, "Not aligned,  %s - %s\n", name[i], name[j] );
203 				fprintf( stderr, "## ERROR  ###\n" );
204 				exit( 1 );
205 			}
206 
207 
208 			fprintf( stderr, "adding %d-%d\r", i, j );
209 			putlocalhom2( pseq[0], pseq[1], localhomtable[i]+j, 0, 0, (int)pscore, strlen( pseq[0] ) );
210 			for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
211 			{
212 				if( tmpptr->opt == -1.0 ) continue;
213 				if( tmpptr->start1 == -1 ) continue;
214 				fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d k\n", i+seedoffset, j+seedoffset, tmpptr->overlapaa, tmpptr->opt * tsuyosa, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 );
215 			}
216 		}
217 	}
218 	fprintf( stderr, "\n" );
219 	fclose( hat3p );
220 
221 #if DEBUG
222 	fprintf( stderr, "calling FreeLocalHomTable\n" );
223 #endif
224 	FreeLocalHomTable( localhomtable, njob );
225 #if DEBUG
226 	fprintf( stderr, "done. FreeLocalHomTable\n" );
227 #endif
228 }
229 
WriteOptions(FILE * fp)230 static void WriteOptions( FILE *fp )
231 {
232 
233 	if( dorp == 'd' ) fprintf( fp, "DNA\n" );
234 	else
235 	{
236 		if     ( scoremtx ==  0 ) fprintf( fp, "JTT %dPAM\n", pamN );
237 		else if( scoremtx ==  1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
238 		else if( scoremtx ==  2 ) fprintf( fp, "M-Y\n" );
239 	}
240     fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
241     if( use_fft ) fprintf( fp, "FFT on\n" );
242 
243 	fprintf( fp, "tree-base method\n" );
244 	if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
245 	else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
246 	if( tbitr || tbweight )
247 	{
248 		fprintf( fp, "iterate at each step\n" );
249 		if( tbitr && tbrweight == 0 ) fprintf( fp, "  unweighted\n" );
250 		if( tbitr && tbrweight == 3 ) fprintf( fp, "  reversely weighted\n" );
251 		if( tbweight ) fprintf( fp, "  weighted\n" );
252 		fprintf( fp, "\n" );
253 	}
254 
255    	 fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
256 
257 	if( alg == 'a' )
258 		fprintf( fp, "Algorithm A\n" );
259 	else if( alg == 'A' )
260 		fprintf( fp, "Algorithm A+\n" );
261 	else if( alg == 'S' )
262 		fprintf( fp, "Apgorithm S\n" );
263 	else if( alg == 'C' )
264 		fprintf( fp, "Apgorithm A+/C\n" );
265 	else
266 		fprintf( fp, "Unknown algorithm\n" );
267 
268 	if( treemethod == 'x' )
269 		fprintf( fp, "Tree = UPGMA (3).\n" );
270 	else if( treemethod == 's' )
271 		fprintf( fp, "Tree = UPGMA (2).\n" );
272 	else if( treemethod == 'p' )
273 		fprintf( fp, "Tree = UPGMA (1).\n" );
274 	else
275 		fprintf( fp, "Unknown tree.\n" );
276 
277     if( use_fft )
278     {
279         fprintf( fp, "FFT on\n" );
280         if( dorp == 'd' )
281             fprintf( fp, "Basis : 4 nucleotides\n" );
282         else
283         {
284             if( fftscore )
285                 fprintf( fp, "Basis : Polarity and Volume\n" );
286             else
287                 fprintf( fp, "Basis : 20 amino acids\n" );
288         }
289         fprintf( fp, "Threshold   of anchors = %d%%\n", fftThreshold );
290         fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
291     }
292 	else
293         fprintf( fp, "FFT off\n" );
294 	fflush( fp );
295 }
296 
297 
main(int argc,char * argv[])298 int main( int argc, char *argv[] )
299 {
300 	static int  nlen[M];
301 	static char **name, **seq;
302 	static char **bseq;
303 	static double *eff;
304 	int i;
305 	char c;
306 	int alloclen;
307 	FILE *infp;
308 
309 	arguments( argc, argv );
310 
311 	if( inputfile )
312 	{
313 		infp = fopen( inputfile, "r" );
314 		if( !infp )
315 		{
316 			fprintf( stderr, "Cannot open %s\n", inputfile );
317 			exit( 1 );
318 		}
319 	}
320 	else
321 		infp = stdin;
322 
323 	getnumlen( infp );
324 	rewind( infp );
325 
326 	if( njob < 2 )
327 	{
328 		fprintf( stderr, "At least 2 sequences should be input!\n"
329 						 "Only %d sequence found.\n", njob );
330 		exit( 1 );
331 	}
332 
333 	name = AllocateCharMtx( njob, B+1 );
334 	seq = AllocateCharMtx( njob, nlenmax*9+1 );
335 	bseq = AllocateCharMtx( njob, nlenmax*9+1 );
336 	alloclen = nlenmax*9;
337 
338 	eff = AllocateDoubleVec( njob );
339 
340 #if 0
341 	Read( name, nlen, seq );
342 #else
343 	readData_pointer( infp, name, nlen, seq );
344 #endif
345 	fclose( infp );
346 
347 	constants( njob, seq );
348 
349 #if 0
350 	fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
351 #endif
352 
353 	initSignalSM();
354 
355 	initFiles();
356 
357 	WriteOptions( trap_g );
358 
359 	c = seqcheck( seq );
360 	if( c )
361 	{
362 		fprintf( stderr, "Illeagal character %c\n", c );
363 		exit( 1 );
364 	}
365 
366 //	writePre( njob, name, nlen, seq, 0 );
367 
368 	for( i=0; i<njob; i++ ) eff[i] = 1.0;
369 
370 
371 	for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
372 
373 
374 //	for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, bseq[i] ); // CHUUI!!
375 	for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, seq[i] );
376 
377 	pairalign( name, nlen, seq, eff, alloclen );
378 
379 	fprintf( trap_g, "done.\n" );
380 #if DEBUG
381 	fprintf( stderr, "closing trap_g\n" );
382 #endif
383 	fclose( trap_g );
384 
385 #if IODEBUG
386 	fprintf( stderr, "OSHIMAI\n" );
387 #endif
388 	SHOWVERSION;
389 	return( 0 );
390 }
391