1 #include "mltaln.h"
2 #include "dp.h"
3 
4 #define DEBUG 0
5 #define DEBUG2 0
6 #define XXXXXXX    0
7 #define USE_PENALTY_EX  1
8 
9 typedef struct _shuryoten
10 {
11 	int i;
12 	int j;
13 	double wm;
14 	struct _shuryoten *next;
15 	struct _shuryoten *prev;
16 } Shuryoten;
17 
18 
19 static int localstop;
20 
compshuryo(Shuryoten * s1_arg,Shuryoten * s2_arg)21 static int compshuryo( Shuryoten *s1_arg, Shuryoten *s2_arg )
22 {
23 	Shuryoten *s1 = (Shuryoten *)s1_arg;
24 	Shuryoten *s2 = (Shuryoten *)s2_arg;
25 	if      ( s1->wm > s2->wm ) return( -1 );
26 	else if ( s1->wm < s2->wm ) return( 1 );
27 	else                        return( 0 );
28 }
29 
match_calc(double * match,char ** s1,char ** s2,int i1,int lgth2)30 static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
31 {
32 	int j;
33 
34 	for( j=0; j<lgth2; j++ )
35 		match[j] = amino_dis[(int)(*s1)[i1]][(int)(*s2)[j]];
36 }
37 
gentracking(int ** used,char ** seq1,char ** seq2,char ** mseq1,char ** mseq2,double ** cpmx1,double ** cpmx2,int ** ijpi,int ** ijpj,int * off1pt,int * off2pt,int endi,int endj)38 static double gentracking( int **used,
39 						char **seq1, char **seq2,
40                         char **mseq1, char **mseq2,
41                         double **cpmx1, double **cpmx2,
42                         int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )
43 {
44 	int l, iin, jin, lgth1, lgth2, k, limk;
45 	int ifi=0, jfi=0;
46 //	char gap[] = "-";
47 	char *gap;
48 	gap = newgapstr;
49 	static char *res1 = NULL, *res2 = NULL;
50 	char *mspt1, *mspt2;
51 	if( res1 == NULL )
52 	{
53 		res1 = (char *)calloc( N, sizeof( char ) );
54 		res2 = (char *)calloc( N, sizeof( char ) );
55 	}
56 
57 	lgth1 = strlen( seq1[0] );
58 	lgth2 = strlen( seq2[0] );
59 
60 	mspt1 = res1 + lgth1+lgth2;
61 	*mspt1 = 0;
62 	mspt2 = res2 + lgth1+lgth2;
63 	*mspt2 = 0;
64 	iin = endi; jin = endj;
65 
66 	limk = lgth1+lgth2;
67 	if( used[iin][jin] ) return( -1.0 );
68 	for( k=0; k<=limk; k++ )
69 	{
70 		ifi = ( ijpi[iin][jin] );
71 		jfi = ( ijpj[iin][jin] );
72 
73 		if( used[ifi][jfi] ) return( -1.0 );
74 
75 		l = iin - ifi;
76 		while( --l )
77 		{
78 			*--mspt1 = seq1[0][ifi+l];
79 			*--mspt2 = *gap;
80 			k++;
81 		}
82 		l= jin - jfi;
83 		while( --l )
84 		{
85 			*--mspt1 = *gap;
86 			*--mspt2 = seq2[0][jfi+l];
87 			k++;
88 		}
89 
90 		if( iin <= 0 || jin <= 0 ) break;
91 		*--mspt1 = seq1[0][ifi];
92 		*--mspt2 = seq2[0][jfi];
93 		if( ijpi[ifi][jfi] == localstop ) break;
94 		if( ijpj[ifi][jfi] == localstop ) break;
95 		k++;
96 		iin = ifi; jin = jfi;
97 	}
98 	if( ifi == -1 ) *off1pt = 0; else *off1pt = ifi;
99 	if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi;
100 
101 //	fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi );
102 
103 	iin = endi; jin = endj;
104 	limk = lgth1+lgth2;
105 	for( k=0; k<=limk; k++ )
106 	{
107 		ifi = ( ijpi[iin][jin] );
108 		jfi = ( ijpj[iin][jin] );
109 
110 		used[ifi][jfi] = 1;
111 		if( iin <= 0 || jin <= 0 ) break;
112 		if( ijpi[ifi][jfi] == localstop ) break;
113 		if( ijpj[ifi][jfi] == localstop ) break;
114 
115 		k++;
116 		iin = ifi; jin = jfi;
117 	}
118 
119 
120 	strcpy( mseq1[0], mspt1 );
121 	strcpy( mseq2[0], mspt2 );
122 
123 	fprintf( stderr, "mseq1=%s\nmseq2=%s\n", mspt1, mspt2 );
124 
125 	return( 0.0 );
126 }
127 
128 
suboptalign11(char ** seq1,char ** seq2,int alloclen,int * off1pt,int * off2pt,LocalHom * lhmpt)129 double suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )
130 /* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
131 {
132 	int k;
133 	static int **used;
134 	register int i, j;
135 	int lasti, lastj;                      /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
136 	int lgth1, lgth2;
137 	int resultlen;
138 	double wm = 0.0;   // by D.Mathog,
139 	double g;
140 	double *currentw, *previousw;
141 #if 1
142 	double *wtmp;
143 	int *ijpipt;
144 	int *ijpjpt;
145 	double *mjpt, *Mjpt, *prept, *curpt;
146 	int *mpjpt, *Mpjpt;
147 #endif
148 	static double mi, *m;
149 	static double Mi, *largeM;
150 	static int **ijpi;
151 	static int **ijpj;
152 	static int mpi, *mp;
153 	static int Mpi, *Mp;
154 	static double *w1, *w2;
155 //	static double *match;
156 	static double *initverticalw;    /* kufuu sureba iranai */
157 	static double *lastverticalw;    /* kufuu sureba iranai */
158 	static char **mseq1;
159 	static char **mseq2;
160 	static double **cpmx1;
161 	static double **cpmx2;
162 	static int **intwork;
163 	static double **doublework;
164 	static int orlgth1 = 0, orlgth2 = 0;
165 	double maxwm;
166 	double tbk;
167 	int tbki, tbkj;
168 	int endali, endalj;
169 //	double localthr = 0.0;
170 //	double localthr2 = 0.0;
171 	double fpenalty = (double)penalty;
172 	double fpenalty_OP = (double)penalty_OP;
173 	double fpenalty_ex = (double)penalty_ex;
174 //	double fpenalty_EX = (double)penalty_EX;
175 	double foffset = (double)offset;
176 	double localthr = -foffset;
177 	double localthr2 = -foffset;
178 	static Shuryoten *shuryo = NULL;
179 	int numshuryo;
180 	double minshuryowm = 0.0; // by D.Mathog
181 	int minshuryopos = 0; // by D.Mathog
182 	double resf;
183 
184 
185 //	fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty );
186 
187 	fprintf( stderr, "in suboptalign11\n" );
188 	if( !shuryo )
189 	{
190 		shuryo = (Shuryoten *)calloc( 100, sizeof( Shuryoten ) );
191 	}
192 	for( i=0; i<100; i++ )
193 	{
194 		shuryo[i].i = -1;
195 		shuryo[i].j = -1;
196 		shuryo[i].wm = 0.0;
197 	}
198 	numshuryo = 0;
199 
200 	if( orlgth1 == 0 )
201 	{
202 	}
203 
204 
205 	lgth1 = strlen( seq1[0] );
206 	lgth2 = strlen( seq2[0] );
207 
208 	fprintf( stderr, "in suboptalign11 step 1\n" );
209 
210 	if( lgth1 > orlgth1 || lgth2 > orlgth2 )
211 	{
212 		int ll1, ll2;
213 
214 	fprintf( stderr, "in suboptalign11 step 1.3\n" );
215 		if( orlgth1 > 0 && orlgth2 > 0 )
216 		{
217 	fprintf( stderr, "in suboptalign11 step 1.4\n" );
218 			FreeFloatVec( w1 );
219 			FreeFloatVec( w2 );
220 //			FreeFloatVec( match );
221 			FreeFloatVec( initverticalw );
222 			FreeFloatVec( lastverticalw );
223 	fprintf( stderr, "in suboptalign11 step 1.5\n" );
224 
225 			FreeFloatVec( m );
226 			FreeIntVec( mp );
227 			FreeFloatVec( largeM );
228 			FreeIntVec( Mp );
229 	fprintf( stderr, "in suboptalign11 step 1.6\n" );
230 
231 
232 			FreeFloatMtx( cpmx1 );
233 			FreeFloatMtx( cpmx2 );
234 
235 	fprintf( stderr, "in suboptalign11 step 1.7\n" );
236 			FreeFloatMtx( doublework );
237 			FreeIntMtx( intwork );
238 		}
239 
240 		ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
241 		ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
242 
243 #if DEBUG
244 		fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
245 #endif
246 
247 		w1 = AllocateFloatVec( ll2+2 );
248 		w2 = AllocateFloatVec( ll2+2 );
249 //		match = AllocateFloatVec( ll2+2 );
250 
251 		initverticalw = AllocateFloatVec( ll1+2 );
252 		lastverticalw = AllocateFloatVec( ll1+2 );
253 
254 		m = AllocateFloatVec( ll2+2 );
255 		mp = AllocateIntVec( ll2+2 );
256 		largeM = AllocateFloatVec( ll2+2 );
257 		Mp = AllocateIntVec( ll2+2 );
258 
259 		cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
260 		cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
261 
262 		doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
263 		intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
264 
265 		mseq1 = AllocateCharMtx( njob, ll1+ll2 );
266 		mseq2 = AllocateCharMtx( njob, ll1+ll2 );
267 
268 #if DEBUG
269 		fprintf( stderr, "succeeded\n" );
270 #endif
271 
272 		orlgth1 = ll1 - 100;
273 		orlgth2 = ll2 - 100;
274 	}
275 	fprintf( stderr, "in suboptalign11 step 1.6\n" );
276 
277 
278 
279 	fprintf( stderr, "in suboptalign11 step 2\n" );
280 
281 	if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
282 	{
283 		int ll1, ll2;
284 
285 		if( commonAlloc1 && commonAlloc2 )
286 		{
287 			FreeIntMtx( commonIP );
288 			FreeIntMtx( commonJP );
289 			FreeIntMtx( used );
290 		}
291 
292 		ll1 = MAX( orlgth1, commonAlloc1 );
293 		ll2 = MAX( orlgth2, commonAlloc2 );
294 
295 #if DEBUG
296 		fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
297 #endif
298 
299 		used = AllocateIntMtx( ll1+10, ll2+10 );
300 		commonIP = AllocateIntMtx( ll1+10, ll2+10 );
301 		commonJP = AllocateIntMtx( ll1+10, ll2+10 );
302 
303 #if DEBUG
304 		fprintf( stderr, "succeeded\n\n" );
305 #endif
306 
307 		commonAlloc1 = ll1;
308 		commonAlloc2 = ll2;
309 	}
310 	ijpi = commonIP;
311 	ijpj = commonJP;
312 
313 
314 #if 0
315 	for( i=0; i<lgth1; i++ )
316 		fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
317 #endif
318 
319 	fprintf( stderr, "in suboptalign11 step 3\n" );
320 	currentw = w1;
321 	previousw = w2;
322 
323 	match_calc( initverticalw, seq2, seq1, 0, lgth1 );
324 
325 	match_calc( currentw, seq1, seq2, 0, lgth2 );
326 
327 
328 	lasti = lgth2+1;
329 	for( j=1; j<lasti; ++j )
330 	{
331 		m[j] = currentw[j-1]; mp[j] = 0;
332 		largeM[j] = currentw[j-1]; Mp[j] = 0;
333 	}
334 
335 	lastverticalw[0] = currentw[lgth2-1];
336 
337 
338 #if 0
339 fprintf( stderr, "currentw = \n" );
340 for( i=0; i<lgth1+1; i++ )
341 {
342 	fprintf( stderr, "%5.2f ", currentw[i] );
343 }
344 fprintf( stderr, "\n" );
345 fprintf( stderr, "initverticalw = \n" );
346 for( i=0; i<lgth2+1; i++ )
347 {
348 	fprintf( stderr, "%5.2f ", initverticalw[i] );
349 }
350 fprintf( stderr, "\n" );
351 #endif
352 #if DEBUG2
353 	fprintf( stderr, "\n" );
354 	fprintf( stderr, "       " );
355 	for( j=0; j<lgth2+1; j++ )
356 		fprintf( stderr, "%c     ", seq2[0][j] );
357 	fprintf( stderr, "\n" );
358 #endif
359 
360 	localstop = lgth1+lgth2+1;
361 	maxwm = -999.9;
362 	endali = endalj = 0;
363 #if DEBUG2
364 	fprintf( stderr, "\n" );
365 	fprintf( stderr, "%c   ", seq1[0][0] );
366 
367 	for( j=0; j<lgth2+1; j++ )
368 		fprintf( stderr, "%5.0f ", currentw[j] );
369 	fprintf( stderr, "\n" );
370 #endif
371 
372 	lasti = lgth1+1;
373 	for( i=1; i<lasti; i++ )
374 	{
375 		wtmp = previousw;
376 		previousw = currentw;
377 		currentw = wtmp;
378 
379 		previousw[0] = initverticalw[i-1];
380 
381 		match_calc( currentw, seq1, seq2, i, lgth2 );
382 #if DEBUG2
383 		fprintf( stderr, "%c   ", seq1[0][i] );
384 		fprintf( stderr, "%5.0f ", currentw[0] );
385 #endif
386 
387 #if XXXXXXX
388 fprintf( stderr, "\n" );
389 fprintf( stderr, "i=%d\n", i );
390 fprintf( stderr, "currentw = \n" );
391 for( j=0; j<lgth2; j++ )
392 {
393 	fprintf( stderr, "%5.2f ", currentw[j] );
394 }
395 fprintf( stderr, "\n" );
396 #endif
397 #if XXXXXXX
398 fprintf( stderr, "\n" );
399 fprintf( stderr, "i=%d\n", i );
400 fprintf( stderr, "currentw = \n" );
401 for( j=0; j<lgth2; j++ )
402 {
403 	fprintf( stderr, "%5.2f ", currentw[j] );
404 }
405 fprintf( stderr, "\n" );
406 #endif
407 		currentw[0] = initverticalw[i];
408 
409 		mi = previousw[0]; mpi = 0;
410 		Mi = previousw[0]; Mpi = 0;
411 
412 #if 0
413 		if( mi < localthr ) mi = localthr2;
414 #endif
415 
416 		ijpipt = ijpi[i] + 1;
417 		ijpjpt = ijpj[i] + 1;
418 		mjpt = m + 1;
419 		Mjpt = largeM + 1;
420 		prept = previousw;
421 		curpt = currentw + 1;
422 		mpjpt = mp + 1;
423 		Mpjpt = Mp + 1;
424 		tbk = -999999.9;
425 		tbki = 0;
426 		tbkj = 0;
427 		lastj = lgth2+1;
428 		for( j=1; j<lastj; j++ )
429 		{
430 			wm = *prept;
431 			*ijpipt = i-1;
432 			*ijpjpt = j-1;
433 
434 
435 //			fprintf( stderr, "i,j=%d,%d %c-%c\n", i, j, seq1[0][i], seq2[0][j] );
436 //			fprintf( stderr, "wm=%f\n", wm );
437 #if 0
438 			fprintf( stderr, "%5.0f->", wm );
439 #endif
440 			g = mi + fpenalty;
441 #if 0
442 			fprintf( stderr, "%5.0f?", g );
443 #endif
444 			if( g > wm )
445 			{
446 				wm = g;
447 //				*ijpipt = i - 1;
448 				*ijpjpt = mpi;
449 			}
450 			g = *prept;
451 			if( g > mi )
452 			{
453 				mi = g;
454 				mpi = j-1;
455 			}
456 
457 #if USE_PENALTY_EX
458 			mi += fpenalty_ex;
459 #endif
460 
461 #if 0
462 			fprintf( stderr, "%5.0f->", wm );
463 #endif
464 			g = *mjpt + fpenalty;
465 #if 0
466 			fprintf( stderr, "m%5.0f?", g );
467 #endif
468 			if( g > wm )
469 			{
470 				wm = g;
471 				*ijpipt = *mpjpt;
472 //				*ijpjpt = j - 1;
473 			}
474 			g = *prept;
475 			if( g > *mjpt )
476 			{
477 				*mjpt = g;
478 				*mpjpt = i-1;
479 			}
480 #if USE_PENALTY_EX
481 			*mjpt += fpenalty_ex;
482 #endif
483 
484 
485 			g =  tbk + fpenalty_OP;
486 //			g =  tbk;
487 			if( g > wm )
488 			{
489 				wm = g;
490 				*ijpipt = tbki;
491 				*ijpjpt = tbkj;
492 //				fprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\n", i, j, *ijpipt, *ijpjpt );
493 			}
494 			g = Mi;
495 			if( g > tbk )
496 			{
497 				tbk = g;
498 				tbki = i-1;
499 				tbkj = Mpi;
500 			}
501 			g = *Mjpt;
502 			if( g > tbk )
503 			{
504 				tbk = g;
505 				tbki = *Mpjpt;
506 				tbkj = j-1;
507 			}
508 //			tbk += fpenalty_EX;// + foffset;
509 
510 			g = *prept;
511 			if( g > *Mjpt )
512 			{
513 				*Mjpt = g;
514 				*Mpjpt = i-1;
515 			}
516 //			*Mjpt += fpenalty_EX;// + foffset;
517 
518 			g = *prept;
519 			if( g > Mi )
520 			{
521 				Mi = g;
522 				Mpi = j-1;
523 			}
524 //			Mi += fpenalty_EX;// + foffset;
525 
526 
527 //			fprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt );
528 //			fprintf( stderr, "ijp = %c,%c\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] );
529 
530 
531 			if( maxwm < wm )
532 			{
533 				maxwm = wm;
534 				endali = i;
535 				endalj = j;
536 			}
537 
538 #if 1
539 			if( numshuryo < 100 )
540 			{
541 				shuryo[numshuryo].i = i;
542 				shuryo[numshuryo].j = j;
543 				shuryo[numshuryo].wm = wm;
544 
545 				if( minshuryowm > wm )
546 				{
547 					 minshuryowm = wm;
548 					 minshuryopos = numshuryo;
549 				}
550 				numshuryo++;
551 			}
552 			else
553 			{
554 				if( wm > minshuryowm )
555 				{
556 					shuryo[minshuryopos].i = i;
557 					shuryo[minshuryopos].j = j;
558 					shuryo[minshuryopos].wm = wm;
559 					minshuryowm = wm;
560 					for( k=0; k<100; k++ )    // muda
561 					{
562 						if( shuryo[k].wm < minshuryowm )
563 						{
564 							minshuryowm = shuryo[k].wm;
565 							minshuryopos = k;
566 							break;
567 						}
568 					}
569 				}
570 			}
571 #endif
572 #if 1
573 			if( wm < localthr )
574 			{
575 //				fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt );
576 				*ijpipt = localstop;
577 //				*ijpjpt = localstop;
578 				wm = localthr2;
579 			}
580 #endif
581 #if 0
582 			fprintf( stderr, "%5.0f ", *curpt );
583 #endif
584 #if DEBUG2
585 			fprintf( stderr, "%5.0f ", wm );
586 //			fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop );
587 #endif
588 
589 			*curpt += wm;
590 			ijpipt++;
591 			ijpjpt++;
592 			mjpt++;
593 			Mjpt++;
594 			prept++;
595 			mpjpt++;
596 			Mpjpt++;
597 			curpt++;
598 		}
599 #if DEBUG2
600 		fprintf( stderr, "\n" );
601 #endif
602 
603 		lastverticalw[i] = currentw[lgth2-1];
604 	}
605 
606 	for( k=0; k<100; k++ )
607 	{
608 		fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
609 	}
610 
611 
612 #if 1
613 	fprintf( stderr, "maxwm = %f\n", maxwm );
614 	fprintf( stderr, "endali = %d\n", endali );
615 	fprintf( stderr, "endalj = %d\n", endalj );
616 #endif
617 
618 	qsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo );
619 	for( k=0; k<100; k++ )
620 	{
621 		fprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
622 	}
623 
624 
625 	lasti = lgth1+1;
626     for( i=0; i<lasti; i++ )
627     {
628         ijpi[i][0] = localstop;
629         ijpj[i][0] = localstop;
630     }
631 	lastj = lgth2+1;
632     for( j=0; j<lastj; j++ )
633     {
634         ijpi[0][j] = localstop;
635         ijpj[0][j] = localstop;
636     }
637 
638 	for( i=0; i<lasti; i++ ) for( j=0; j<lastj; j++ ) used[i][j] = 0;
639 
640 	for( k=0; k<numshuryo; k++ )
641 	{
642 		if( shuryo[k].wm < shuryo[0].wm * 0.3 ) break;
643 		fprintf( stderr, "k=%d, shuryo[k].i,j,wm=%d,%d,%f go\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
644 		resf = gentracking( used, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, shuryo[k].i, shuryo[k].j );
645 		if( resf == -1.0 ) continue;
646 		putlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ) );
647 #if 0
648 		fprintf( stderr, "\n" );
649 		fprintf( stderr, ">\n%s\n", mseq1[0] );
650 		fprintf( stderr, ">\n%s\n", mseq2[0] );
651 #endif
652 	}
653 	for( i=0; i<20; i++ )
654 	{
655 		for( j=0; j<20; j++ )
656 		{
657 			fprintf( stderr, "%2d ", used[i][j] );
658 		}
659 		fprintf( stderr, "\n" );
660 	}
661 
662 
663 //	fprintf( stderr, "### impmatch = %f\n", *impmatch );
664 
665 	resultlen = strlen( mseq1[0] );
666 	if( alloclen < resultlen || resultlen > N )
667 	{
668 		fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
669 		ErrorExit( "LENGTH OVER!\n" );
670 	}
671 
672 
673 
674 
675 
676 	return( wm );
677 }
678 
679