1 #include "mltaln.h"
2 #include "dp.h"
3 
4 #define DEBUG 0
5 #define XXXXXXX    0
6 #define USE_PENALTY_EX  0
7 
extendmseq(char ** mseq1,char ** mseq2,char ** seq1,char ** seq2,int i,int j,int prevhiti,int prevhitj)8 static void extendmseq( char **mseq1, char **mseq2, char **seq1, char **seq2, int i, int j, int prevhiti, int prevhitj )
9 {
10 //	char gap[] = "-";
11 	char *gap;
12 	gap = newgapstr;
13 	int l;
14 
15 	fprintf( stderr, "i=%d, prevhiti=%d\n", i, prevhiti );
16 	fprintf( stderr, "j=%d, prevhitj=%d\n", j, prevhitj );
17 	l = prevhiti - i - 1;
18 	fprintf( stderr, "l=%d\n", l );
19 	while( l>0 )
20 	{
21 		*--mseq1[0] = seq1[0][i+l--];
22 		*--mseq2[0] = *gap;
23 	}
24 	l= prevhitj - j - 1;
25 	fprintf( stderr, "l=%d\n", l );
26 	while( l>0 )
27 	{
28 		*--mseq1[0] = *gap;
29 		*--mseq2[0] = seq2[0][j+l--];
30 	}
31 	if( i < 0 || j < 0 ) return;
32 	*--mseq1[0] = seq1[0][i];
33 	*--mseq2[0] = seq2[0][j];
34 	fprintf( stderr, "added %c to mseq1, mseq1 = %s \n", seq1[0][i], mseq1[0] );
35 	fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] );
36 }
37 
match_calc(double * match,char ** s1,char ** s2,int i1,int lgth2)38 static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
39 {
40 	char tmpc = s1[0][i1];
41 	char *seq2 = s2[0];
42 
43 	while( lgth2-- )
44 		*match++ = amino_dis[(int)tmpc][(int)*seq2++];
45 }
46 
Atracking(double * lasthorizontalw,double * lastverticalw,char ** seq1,char ** seq2,char ** mseq1,char ** mseq2,double ** cpmx1,double ** cpmx2,int ** ijp)47 static double Atracking( double *lasthorizontalw, double *lastverticalw,
48 						char **seq1, char **seq2,
49                         char **mseq1, char **mseq2,
50                         double **cpmx1, double **cpmx2,
51                         int **ijp )
52 {
53 	int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
54 //	char gap[] = "-";
55 	char *gap;
56 	gap = newgapstr;
57 	lgth1 = strlen( seq1[0] );
58 	lgth2 = strlen( seq2[0] );
59 
60 
61 #if 0
62 	for( i=0; i<lgth1; i++ )
63 	{
64 		fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
65 	}
66 #endif
67 
68     for( i=0; i<lgth1+1; i++ )
69     {
70         ijp[i][0] = i + 1;
71     }
72     for( j=0; j<lgth2+1; j++ )
73     {
74         ijp[0][j] = -( j + 1 );
75     }
76 
77 
78 	mseq1[0] += lgth1+lgth2;
79 	*mseq1[0] = 0;
80 	mseq2[0] += lgth1+lgth2;
81 	*mseq2[0] = 0;
82 	iin = lgth1; jin = lgth2;
83 	limk = lgth1+lgth2 + 1;
84 	for( k=0; k<limk; k++ )
85 	{
86 		if( ijp[iin][jin] < 0 )
87 		{
88 			ifi = iin-1; jfi = jin+ijp[iin][jin];
89 		}
90 		else if( ijp[iin][jin] > 0 )
91 		{
92 			ifi = iin-ijp[iin][jin]; jfi = jin-1;
93 		}
94 		else
95 		{
96 			ifi = iin-1; jfi = jin-1;
97 		}
98 		l = iin - ifi;
99 		while( --l )
100 		{
101 			*--mseq1[0] = seq1[0][ifi+l];
102 			*--mseq2[0] = *gap;
103 			k++;
104 		}
105 		l= jin - jfi;
106 		while( --l )
107 		{
108 			*--mseq1[0] = *gap;
109 			*--mseq2[0] = seq2[0][jfi+l];
110 			k++;
111 		}
112 		if( iin <= 0 || jin <= 0 ) break;
113 		*--mseq1[0] = seq1[0][ifi];
114 		*--mseq2[0] = seq2[0][jfi];
115 		k++;
116 		iin = ifi; jin = jfi;
117 	}
118 	return( 0.0 );
119 }
120 
backdp(double ** WMMTX,double wmmax,double * maxinw,double * maxinh,int lgth1,int lgth2,int alloclen,double * w1,double * w2,double * initverticalw,double * m,int * mp,int iin,int jin,char ** seq1,char ** seq2,char ** mseq1,char ** mseq2)121 void backdp( double **WMMTX, double wmmax, double *maxinw, double *maxinh, int lgth1, int lgth2, int alloclen, double *w1, double *w2, double *initverticalw, double *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )
122 {
123 	register int i, j;
124 	int prevhiti, prevhitj;
125 //	int lasti, lastj;
126 	double g;
127 	double fpenalty = (double)penalty;
128 #if USE_PENALTY_EX
129 	double fpenalty_ex = (double)penalty_ex;
130 #endif
131 	double *currentw, *previousw, *wtmp;
132 	double mi;
133 	int mpi;
134 	int *mpjpt;
135 	double *mjpt, *prept, *curpt;
136 	double wm = 0.0;
137 	double forwwm;
138 
139 	currentw = w1;
140 	previousw = w2;
141 
142 	match_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 );
143 	match_calc( currentw, seq1, seq2, lgth1-1, lgth2 );
144 
145 
146 	prevhiti = iin;
147 	prevhitj = jin;
148 	fprintf( stderr, "prevhiti = %d, lgth1 = %d\n", prevhiti, lgth1 );
149 	fprintf( stderr, "prevhitj = %d, lgth2 = %d\n", prevhitj, lgth2 );
150 	extendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 );
151 
152 	for( i=0; i<lgth1-1; i++ )
153 	{
154 		initverticalw[i] += fpenalty;
155 		WMMTX[i][lgth2-1] += fpenalty;
156 	}
157 	for( j=0; j<lgth2-1; j++ )
158 	{
159 		currentw[j] += fpenalty;
160 		WMMTX[lgth1-1][j] += fpenalty;
161 	}
162 
163 
164 #if 0
165 	fprintf( stderr, "initverticalw = \n" );
166 	for( i=0; i<lgth1; i++ )
167 		fprintf( stderr, "% 8.2f", initverticalw[i] );
168 	fprintf( stderr, "\n" );
169 	fprintf( stderr, "currentw = \n" );
170 	for( i=0; i<lgth2; i++ )
171 		fprintf( stderr, "% 8.2f", currentw[i] );
172 	fprintf( stderr, "\n" );
173 #endif
174 
175 	for( j=lgth2-1; j>0; --j )
176 	{
177 		m[j-1] = currentw[j];
178 		mp[j] = 0; // iranai
179 	}
180 
181 	for( j=0; j<lgth2; j++ ) m[j] = 0.0;
182 	//m[lgth2-1] ga, irunoka iranainoka irahai.
183 
184 	for( i=lgth1-2; i>-1; i-- )
185 	{
186 		wtmp = previousw;
187 		previousw = currentw;
188 		currentw = wtmp;
189 
190 		previousw[lgth2-1] = initverticalw[i+1];
191 
192 		match_calc( currentw, seq1, seq2, i, lgth2 );
193 
194 #if 0
195 		fprintf( stderr, "i=%d, currentw = \n", i );
196 		for( j=0; j<lgth2; j++ ) fprintf( stderr, "% 8.2f", currentw[j] );
197 		fprintf( stderr, "\n" );
198 #endif
199 
200 		currentw[lgth2-1] = initverticalw[i];
201 
202 		mi = previousw[lgth2-1];
203 		mpi = lgth2-1; //iranai
204 
205 
206 		mjpt = m + lgth2 - 2;
207 		prept = previousw + lgth2 - 1;
208 		curpt = currentw + lgth2 - 2;
209 		mpjpt = mp + lgth2 - 2;
210 
211 		for( j=lgth2-2; j>-1; j-- )
212 		{
213 
214 //			fprintf( stderr, "i,j=%d,%d %c-%c ", i, j, seq1[0][i], seq2[0][j] );
215 			wm = *prept;
216 			g = mi + fpenalty;
217 #if 0
218 			fprintf( stderr, "%5.0f?", g );
219 #endif
220 			if( g > wm )
221 			{
222 				wm = g;
223 			}
224 			g = *prept;
225 			if( g >= mi )
226 			{
227 				mi = g;
228 				mpi = j+1; //iranai
229 			}
230 #if USE_PENALTY_EX
231 			mi += fpenalty_ex;
232 #endif
233 
234 			g = *mjpt + fpenalty;
235 #if 0
236 			fprintf( stderr, "%5.0f?", g );
237 #endif
238 			if( g > wm )
239 			{
240 				wm = g;
241 			}
242 			g = *prept;
243 			if( g >= *mjpt )
244 			{
245 				*mjpt = g;
246 				*mpjpt = i-1; //iranai
247 			}
248 #if USE_PENALTY_EX
249 			m[j] += fpenalty_ex;
250 #endif
251 
252 #if 0
253 			fprintf( stderr, "*curpt = %5.0f \n", *curpt );
254 #endif
255 
256 //			forwwm = wm + MIN( maxinw[i], maxinh[j] );
257 			forwwm = wm + MIN( maxinw[i], maxinh[j] );
258 			WMMTX[i][j] = forwwm;
259 			if( forwwm == wmmax && i<prevhiti && j<prevhitj )
260 			{
261 				fprintf( stderr, "hit!\n" );
262 				extendmseq( mseq1, mseq2, seq1, seq2, i, j, prevhiti, prevhitj );
263 				if( forwwm == wmmax )
264 				{
265 					*--mseq1[0] = 'u';
266 					*--mseq2[0] = 'u';
267 				}
268 				prevhiti = i;
269 				prevhitj = j;
270 			}
271 			*curpt += wm;
272 
273 			mjpt--;
274 			prept--;
275 			mpjpt--;
276 			curpt--;
277 		}
278 	}
279 	extendmseq( mseq1, mseq2, seq1, seq2, -1, -1, prevhiti, prevhitj );
280 }
281 
282 
MSalign11(char ** seq1,char ** seq2,int alloclen)283 double MSalign11( char **seq1, char **seq2, int alloclen )
284 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
285 {
286 //	int k;
287 	register int i, j;
288 	int lasti, lastj;
289 	int iin = 0, jin = 0;  // by Mathog, a guess
290 	int lgth1, lgth2;
291 	int resultlen;
292 	double wm = 0.0;   /* int ?????? */
293 	double g;
294 	double *currentw, *previousw;
295 	double fpenalty = (double)penalty;
296 #if USE_PENALTY_EX
297 	double fpenalty_ex = (double)penalty_ex;
298 #endif
299 	double *maxinw = NULL, *maxinwpt = NULL; // by D.Mathog, guess
300 	double *maxinh = NULL; // by D.Mathog, guess
301 #if 1
302 	double wmmax;
303 	double *wtmp;
304 	int *ijppt;
305 	double *mjpt, *prept, *curpt;
306 	int *mpjpt;
307 #endif
308 	static double mi, *m;
309 	static int **ijp;
310 	static int mpi, *mp;
311 	static double *w1, *w2;
312 	static double *match;
313 	static double *initverticalw;    /* kufuu sureba iranai */
314 	static double *lastverticalw;    /* kufuu sureba iranai */
315 	static char **mseq1;
316 	static char **mseq2;
317 	static char **mseq;
318 	static double **cpmx1;
319 	static double **cpmx2;
320 	static int **intwork;
321 	static double **WMMTX;
322 	static double **doublework;
323 	static int orlgth1 = 0, orlgth2 = 0;
324 
325 	if( orlgth1 == 0 )
326 	{
327 		mseq1 = AllocateCharMtx( njob, 0 );
328 		mseq2 = AllocateCharMtx( njob, 0 );
329 	}
330 
331 
332 	lgth1 = strlen( seq1[0] );
333 	lgth2 = strlen( seq2[0] );
334 
335 	if( lgth1 > orlgth1 || lgth2 > orlgth2 )
336 	{
337 		int ll1, ll2;
338 
339 		if( orlgth1 > 0 && orlgth2 > 0 )
340 		{
341 			FreeFloatVec( w1 );
342 			FreeFloatVec( w2 );
343 			FreeFloatVec( match );
344 			FreeFloatVec( initverticalw );
345 			FreeFloatVec( lastverticalw );
346 			FreeFloatVec( maxinw );
347 			FreeFloatVec( maxinh );
348 
349 			FreeFloatVec( m );
350 			FreeIntVec( mp );
351 
352 			FreeCharMtx( mseq );
353 
354 
355 			FreeFloatMtx( cpmx1 );
356 			FreeFloatMtx( cpmx2 );
357 
358 			FreeFloatMtx( doublework );
359 			FreeIntMtx( intwork );
360 		}
361 
362 		ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
363 		ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
364 
365 #if DEBUG
366 		fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
367 #endif
368 
369 		w1 = AllocateFloatVec( ll2+2 );
370 		w2 = AllocateFloatVec( ll2+2 );
371 		match = AllocateFloatVec( ll2+2 );
372 
373 		initverticalw = AllocateFloatVec( ll1+2 );
374 		lastverticalw = AllocateFloatVec( ll1+2 );
375 		maxinw = AllocateFloatVec( ll1+2 );
376 
377 
378 		m = AllocateFloatVec( ll2+2 );
379 		mp = AllocateIntVec( ll2+2 );
380 		maxinh = AllocateFloatVec( ll2+2 );
381 
382 		mseq = AllocateCharMtx( njob, ll1+ll2 );
383 
384 		cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
385 		cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
386 
387 		doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
388 		intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
389 
390 #if DEBUG
391 		fprintf( stderr, "succeeded\n" );
392 #endif
393 
394 		orlgth1 = ll1 - 100;
395 		orlgth2 = ll2 - 100;
396 	}
397 
398 
399 	mseq1[0] = mseq[0];
400 	mseq2[0] = mseq[1];
401 
402 
403 	if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
404 	{
405 		int ll1, ll2;
406 
407 		if( commonAlloc1 && commonAlloc2 )
408 		{
409 			FreeIntMtx( commonIP );
410 			FreeFloatMtx( WMMTX );
411 		}
412 
413 		ll1 = MAX( orlgth1, commonAlloc1 );
414 		ll2 = MAX( orlgth2, commonAlloc2 );
415 
416 #if DEBUG
417 		fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
418 #endif
419 
420 		commonIP = AllocateIntMtx( ll1+10, ll2+10 );
421 		WMMTX = AllocateFloatMtx( ll1+10, ll2+10 );
422 
423 #if DEBUG
424 		fprintf( stderr, "succeeded\n\n" );
425 #endif
426 
427 		commonAlloc1 = ll1;
428 		commonAlloc2 = ll2;
429 	}
430 	ijp = commonIP;
431 
432 
433 #if 0
434 	for( i=0; i<lgth1; i++ )
435 		fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
436 #endif
437 
438 	currentw = w1;
439 	previousw = w2;
440 
441 	match_calc( initverticalw, seq2, seq1, 0, lgth1 );
442 
443 
444 	match_calc( currentw, seq1, seq2, 0, lgth2 );
445 
446 	WMMTX[0][0] = initverticalw[0];
447 
448 	maxinh[0] = initverticalw[0];
449 	for( i=1; i<lgth1+1; i++ )
450 	{
451 		initverticalw[i] += fpenalty;
452 		WMMTX[i][0] = initverticalw[i];
453 		if( maxinh[0] < initverticalw[i] ) maxinh[0] = initverticalw[i];
454 	}
455 	maxinw[0] = currentw[0];
456 	for( j=1; j<lgth2+1; j++ )
457 	{
458 		currentw[j] += fpenalty;
459 		WMMTX[0][j] = currentw[j];
460 		if( maxinw[0] < currentw[j] ) maxinw[0] = currentw[j];
461 	}
462 
463 	for( j=1; j<lgth2+1; ++j )
464 	{
465 		m[j] = currentw[j-1]; mp[j] = 0;
466 	}
467 
468 	lastverticalw[0] = currentw[lgth2-1];
469 
470 	lasti = lgth1+1;
471 
472 	for( i=1; i<lasti; i++ )
473 	{
474 		wtmp = previousw;
475 		previousw = currentw;
476 		currentw = wtmp;
477 
478 		previousw[0] = initverticalw[i-1];
479 
480 		match_calc( currentw, seq1, seq2, i, lgth2 );
481 
482 		currentw[0] = initverticalw[i];
483 
484 		mi = previousw[0]; mpi = 0;
485 
486 		maxinwpt = maxinw + i;
487 		*maxinwpt = currentw[0];
488 
489 		fprintf( stderr, "currentw[0]  = %f, *maxinwpt = %f\n", currentw[0], maxinw[i] );
490 
491 		ijppt = ijp[i] + 1;
492 		mjpt = m + 1;
493 		prept = previousw;
494 		curpt = currentw + 1;
495 		mpjpt = mp + 1;
496 		lastj = lgth2+1;
497 
498 		for( j=1; j<lastj; j++ )
499 		{
500 			wm = *prept;
501 			*ijppt = 0;
502 
503 #if 0
504 			fprintf( stderr, "%5.0f->", wm );
505 #endif
506 			g = mi + fpenalty;
507 #if 0
508 			fprintf( stderr, "%5.0f?", g );
509 #endif
510 			if( g > wm )
511 			{
512 				wm = g;
513 				*ijppt = -( j - mpi );
514 			}
515 			g = *prept;
516 			if( g >= mi )
517 			{
518 				mi = g;
519 				mpi = j-1;
520 			}
521 #if USE_PENALTY_EX
522 			mi += fpenalty_ex;
523 #endif
524 
525 			g = *mjpt + fpenalty;
526 #if 0
527 			fprintf( stderr, "%5.0f?", g );
528 #endif
529 			if( g > wm )
530 			{
531 				wm = g;
532 				*ijppt = +( i - *mpjpt );
533 			}
534 			g = *prept;
535 			if( g >= *mjpt )
536 			{
537 				*mjpt = g;
538 				*mpjpt = i-1;
539 			}
540 #if USE_PENALTY_EX
541 			m[j] += fpenalty_ex;
542 #endif
543 
544 #if 0
545 			fprintf( stderr, "%5.0f ", wm );
546 #endif
547 			*curpt += wm;
548 
549 			WMMTX[i][j] = *curpt;
550 
551 
552 			if( j<lgth2 && *maxinwpt < *curpt ) *maxinwpt = *curpt;
553 			if( j<lgth2 && maxinh[j] < *curpt ) maxinh[j] = *curpt;
554 //			fprintf( stderr, "maxintwpt = %f\n", *maxinwpt );
555 
556 			ijppt++;
557 			mjpt++;
558 			prept++;
559 			mpjpt++;
560 			curpt++;
561 		}
562 		lastverticalw[i] = currentw[lgth2-1];
563 	}
564 
565 	wmmax = -999.9;
566 	for( i=0; i<lgth1; i++ )
567 	{
568 		g = lastverticalw[i];
569 		if( g > wmmax )
570 		{
571 			wmmax = g;
572 			iin = i;
573 			jin = lgth2-1;
574 		}
575 	}
576 	for( j=0; j<lgth2; j++ )
577 	{
578 		g = currentw[j];
579 		if( g > wmmax )
580 		{
581 			wmmax = g;
582 			iin = lgth1-1;
583 			jin = j;
584 		}
585 	}
586 
587 	for( i=0; i<lgth1; i++ )
588 		fprintf( stderr, "maxinw[%d] = %f\n", i, maxinw[i] );
589 	for( j=0; j<lgth2; j++ )
590 		fprintf( stderr, "maxinh[%d] = %f\n", j, maxinh[j] );
591 
592 	fprintf( stderr, "wmmax = %f (%d,%d)\n", wmmax, iin, jin );
593 	if( iin == lgth1 - 1 && jin == lgth2 - 1 )
594 		;
595 	else
596 		wmmax += fpenalty;
597 
598 	fprintf( stderr, "wmmax = %f\n", wmmax );
599 
600 #if 0
601 	for( i=0; i<lgth1; i++ )
602 	{
603 		for( j=0; j<lgth2; j++ )
604 		{
605 			fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
606 		}
607 		fprintf( stderr, "\n" );
608 	}
609 #endif
610 
611 	mseq1[0] += lgth1+lgth2;
612 	*mseq1[0] = 0;
613 	mseq2[0] += lgth1+lgth2;
614 	*mseq2[0] = 0;
615 
616 	backdp( WMMTX, wmmax, maxinw, maxinh, lgth1, lgth2, alloclen, w1, w2, initverticalw, m, mp, iin, jin, seq1, seq2, mseq1, mseq2 );
617 
618 	fprintf( stderr, "\n" );
619 #if 1
620 	fprintf( stderr, "\n" );
621 	fprintf( stderr, ">MSres\n%s\n", mseq1[0] );
622 	fprintf( stderr, ">MSres\n%s\n", mseq2[0] );
623 #endif
624 
625 #if 0
626 	for( i=0; i<lgth1; i++ )
627 	{
628 		for( j=0; j<lgth2; j++ )
629 		{
630 			fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
631 		}
632 		fprintf( stderr, "\n" );
633 	}
634 #endif
635 
636 	mseq1[0] = mseq[0];
637 	mseq2[0] = mseq[1];
638 	mseq1[0] += lgth1+lgth2;
639 	*mseq1[0] = 0;
640 	mseq2[0] += lgth1+lgth2;
641 	*mseq2[0] = 0;
642 
643 	Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp );
644 
645 
646 	resultlen = strlen( mseq1[0] );
647 	if( alloclen < resultlen || resultlen > N )
648 	{
649 		fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
650 		ErrorExit( "LENGTH OVER!\n" );
651 	}
652 
653 
654 	strcpy( seq1[0], mseq1[0] );
655 	strcpy( seq2[0], mseq2[0] );
656 #if 1
657 	fprintf( stderr, "\n" );
658 	fprintf( stderr, ">\n%s\n", mseq1[0] );
659 	fprintf( stderr, ">\n%s\n", mseq2[0] );
660 #endif
661 
662 
663 	return( wm );
664 }
665 
666