1 2%{ 3 4#include "sequence.h" 5#include "compmat.h" 6#include "glib.h" 7#include "hsp.h" 8 9#define UNFEASIBLY_LARGE_SCORE 1000000 10 11 12 13%} 14 15 16struct TopScoreManager 17int * score 18int length 19int current_pos 20int worst_score 21int worst_position 22 23struct QuerySeqHSP 24Sequence * query 25int ** score !link 26int * self_score 27int len 28 29struct HSPmanager 30GHashTable * target_hash !link 31Sequence * query 32CompMat * mat 33int drop_off 34int min_score !def=40 35TopScoreManager * tsm; 36QuerySeqHSP * qs 37HSPCache * cache 38int hsp_count 39 40 41struct LineariseHSPPara 42int max_size 43int min_score 44int width 45int tail 46int verbosity 47 48 49 50%{ 51#include "hsphandler.h" 52 53 54%func 55Makes a new topscore manager of a specific length 56%% 57TopScoreManager * new_TopScoreManager(int length) 58{ 59 60 61 TopScoreManager * tsm; 62 63 tsm = TopScoreManager_alloc(); 64 tsm->score = calloc(length,sizeof(int)); 65 tsm->current_pos = 0; 66 tsm->length = length; 67 68 tsm->worst_score = 0; 69 tsm->worst_position = -1; 70 71 return tsm; 72} 73 74 75%func 76Adds a top score 77%% 78boolean add_score_TopScoreManager(TopScoreManager * tsm,int score) 79{ 80 int i; 81 int temp_score; 82 int temp_pos; 83 84/* fprintf(stderr,"Looking at %d\n",score);*/ 85 86 if( tsm->current_pos < tsm->length ) { 87 tsm->score[tsm->current_pos++] = score; 88 return TRUE; 89 } 90 91 92 if( tsm->worst_position == -1 ) { 93 /* fprintf(stderr,"Recalculating...\n"); */ 94 /* need to recalculate top score */ 95 for(i=1,temp_score = tsm->score[0],temp_pos = 0;i<tsm->length;i++) { 96 if( temp_score > tsm->score[i] ) { 97 temp_score = tsm->score[i]; 98 temp_pos = i; 99 } 100 } 101 tsm->worst_score = temp_score; 102 tsm->worst_position = temp_pos; 103 } 104 105 if( score < tsm->worst_score ) { 106/* fprintf(stderr,"Ignoring... %d vs %d\n",score,tsm->worst_score); */ 107 return FALSE; 108 } 109 110/* fprintf(stderr,"Resetting...\n"); */ 111 tsm->score[tsm->worst_position] = score; 112 tsm->worst_position = -1; 113 114 return TRUE; 115} 116 117%func 118A simpler truncation method, using diagonals 119%% 120LinearHSPmanager * truncated_simple_LinearHSPmanager(LinearHSPmanager * lm,LineariseHSPPara * para) 121{ 122 LinearHSPmanager * out; 123 int i; 124 int j; 125 int new_score; 126 127 int diag_one; 128 int diag_two; 129 130 int query_position[500]; 131 int k; 132 int aa_count; 133 int query_offset; 134/* 135 struct timeval t1; 136 struct timeval t2; 137 struct timeval t3; 138*/ 139 140/* gettimeofday(&t1,NULL);*/ 141 142 if( VERBOSITY_CHECK(2,para->verbosity) ) { 143 info("Input linear management of %d entries\n",lm->len); 144 } 145 146 147 for(i=0;i<lm->len;i++) { 148 /* rescore this set wrt to top diagonal */ 149 sort_HSPset_by_score(lm->set[i]); 150 new_score = lm->set[i]->hsp[0]->score; 151 diag_one = lm->set[i]->hsp[0]->query_start - lm->set[i]->hsp[0]->target_start; 152 153 if( VERBOSITY_CHECK(5,para->verbosity) ) { 154 info("Looking at %s with starting hsp score of %d\n",lm->set[i]->hsp[0]->target->name,new_score); 155 } 156 157 query_offset = lm->set[i]->hsp[0]->query_start - 200; 158 159 for(k=0;k<500;k++) { 160 query_position[k] = 0; 161 } 162 for(k=0;k<lm->set[i]->hsp[0]->length && lm->set[i]->hsp[0]->query_start - query_offset+k < 500;k++) { 163 query_position[lm->set[i]->hsp[0]->query_start - query_offset+k] = 1; 164 } 165 166 167 for(j=1;j<lm->set[i]->len;j++) { 168 169 if( lm->set[i]->hsp[j]->score < para->min_score ) { 170 if( VERBOSITY_CHECK(8,para->verbosity) ) { 171 info(" ...not accepting hsp %d,%d due to min score",lm->set[i]->hsp[j]->query_start,lm->set[i]->hsp[j]->target_start); 172 } 173 174 break; 175 } 176 177 diag_two = lm->set[i]->hsp[j]->query_start - lm->set[i]->hsp[j]->target_start; 178 if( abs(diag_one - diag_two) < para->width && abs(lm->set[i]->hsp[0]->query_start - lm->set[i]->hsp[j]->query_start) < para->tail) { 179 180 /* this is now in the right area */ 181 /* but we need to test for overlap on existing cases */ 182 183 184 /* fprintf(stderr," .... looking at hsp overlap\n");*/ 185 186 for(aa_count = 0,k=0;k<lm->set[i]->hsp[j]->length && lm->set[i]->hsp[j]->query_start - query_offset+k < 500;k++) { 187 if( query_position[lm->set[i]->hsp[j]->query_start - query_offset+k] == 1 ) { 188 aa_count++; 189 } 190 } 191 192 193 /* if we have more than 15% of this hsp "accounted for" move on */ 194 195 if(aa_count/(double)lm->set[i]->hsp[j]->length > 0.15 ) { 196 continue; 197 } 198 199 /* fprintf(stderr," .... updating HSP overlap\n");*/ 200 201 /* now set these positions as used */ 202 203 for(aa_count = 0,k=0;k<lm->set[i]->hsp[j]->length && lm->set[i]->hsp[j]->query_start - query_offset+k < 500;k++) { 204 query_position[lm->set[i]->hsp[j]->query_start - query_offset+k] = 1; 205 } 206 207 new_score += lm->set[i]->hsp[j]->score; 208 if( VERBOSITY_CHECK(5,para->verbosity) ) { 209 info(" ..accepting hsp on %s %d,%d new score %d",lm->set[i]->hsp[j]->target->name,lm->set[i]->hsp[j]->query_start,lm->set[i]->hsp[j]->target_start,new_score); 210 } 211 } else { 212 if( VERBOSITY_CHECK(5,para->verbosity) ) { 213 info(" ...not accepting hsp %d,%d due to diagonal width",lm->set[i]->hsp[j]->query_start,lm->set[i]->hsp[j]->target_start); 214 215 } 216 217 } 218 } 219 220 if( VERBOSITY_CHECK(3,para->verbosity) ) { 221 info("Looking at %s with final score of %d",lm->set[i]->hsp[0]->target->name,new_score); 222 } 223 224 lm->set[i]->score = new_score; 225 } 226 227 /* gettimeofday(&t2,NULL); */ 228 229 if( VERBOSITY_CHECK(2,para->verbosity) ) { 230 info("sorting %d items",lm->len); 231 } 232 233 /*qsort(lm->set,lm->len,sizeof(HSPset*),compare_HSPset_score_qsort);*/ 234 235 sort_LinearHSPmanager(lm,compare_HSPset_score); 236 237 238 /* gettimeofday(&t3,NULL); 239 240 info("truncation clock point: rescoring %f : sorting %f", 241 t2.tv_sec - t1.tv_sec + ((t2.tv_usec - t1.tv_usec) * 1e-6), 242 t3.tv_sec - t2.tv_sec + ((t2.tv_usec - t2.tv_usec) * 1e-6) 243 ); 244 245 */ 246 247 out = LinearHSPmanager_alloc_std(); 248 out->mat = hard_link_CompMat(lm->mat); 249 250 251 for(i=0;i<lm->len;i++) { 252 if( VERBOSITY_CHECK(5,para->verbosity) ) { 253 info("Accepting hit %s position %d with score %d",lm->set[i]->hsp[0]->target->name,i,lm->set[i]->score); 254 } 255 256 add_LinearHSPmanager(out,hard_link_HSPset(lm->set[i])); 257 if( i > para->max_size ) { 258 break; 259 } 260 } 261 262 263 return out; 264} 265 266 267 268%func 269Makes a truncated linear set 270%% 271LinearHSPmanager * truncated_LinearHSPmanager(LinearHSPmanager * lm,int max_size,int min_score,int width,int tail) 272{ 273 274 LinearHSPmanager * out; 275 int i,j; 276 int worst_score = UNFEASIBLY_LARGE_SCORE; 277 int worst_j = -1; 278 HSPset ** worst; 279 HSPset * trial; 280 281 assert(max_size > 0); 282 283 out = LinearHSPmanager_alloc_len(max_size+1); 284 out->mat = hard_link_CompMat(lm->mat); 285 286 sort_LinearHSPmanager(lm,compare_HSPset_score); 287 288 for(i=0;i<lm->len;i++) { 289 /* we chew up into max_size positions */ 290 if( out->len < max_size ) { 291 trial = new_consistent_HSPset(lm->set[i],min_score,width,tail); 292 if( trial->len == 0 ) { 293 continue; 294 } 295 296 add_LinearHSPmanager(out,trial); 297 } else { 298 /* if have not found worst, scan to find worst */ 299 if( worst_score == UNFEASIBLY_LARGE_SCORE ) { 300 for(j=0;j<out->len;j++) { 301 if( out->set[j]->score < worst_score ) { 302 worst_score = out->set[j]->score; 303 worst = out->set+j; 304 worst_j = j; 305 } 306 } 307 } 308 /* worst_score is now the score */ 309 if( lm->set[i]->score < worst_score ) { 310 break; /* otta here - we don't need to look at any more! */ 311 } 312 trial = new_consistent_HSPset(lm->set[i],min_score,width,tail); 313 if( trial->len == 0 ) { 314 continue; 315 } 316 317 318 if( trial->score > worst_score ) { 319 free_HSPset(out->set[worst_j]); 320 out->set[worst_j] = trial; 321 worst_score = UNFEASIBLY_LARGE_SCORE; 322 } else { 323 free_HSPset(trial); 324 } 325 } 326 } 327 328 return out; 329 330} 331 332%func 333Makes an HSP set via heuristics 334to deal with low complexity regions 335%% 336HSPset * new_consistent_HSPset(HSPset * set,int min_score,int width,int tail) 337{ 338 HSPset * out; 339 int i,j; 340 int diag_a; 341 int diag_b; 342 int eaten; 343 344 /* fprintf(stderr,"Entering consistency for %s with %d\n",set->hsp[0]->target->name,set->len); */ 345 out = HSPset_alloc_std(); 346 347 sort_HSPset_by_score(set); 348 349 350 add_HSPset(out,hard_link_HSP(set->hsp[0])); 351 out->score = set->hsp[0]->score; 352 353 for(i=1;i<set->len;i++) { 354 /* check against exisiting HSPs. If fits, add into big set */ 355 eaten = 0; 356 357 if( set->hsp[i]->score < min_score ) { 358 continue; 359 } 360 361 for(j=0;j<out->len;j++) { 362 diag_a = set->hsp[i]->query_start - set->hsp[i]->target_start; 363 diag_b = out->hsp[j]->query_start - out->hsp[j]->target_start; 364 if( abs(diag_a - diag_b) > 2 * width ) { 365 continue; /* does not match */ 366 } 367 368 eaten = 1; 369 add_HSPset(out,hard_link_HSP(set->hsp[i])); 370 out->score += set->hsp[i]->score; 371 break; 372 } 373 } 374 375 376 377 return out; 378 379} 380 381%func 382New, simpler LinearHSPmanager with diagonal heuristics 383%% 384LinearHSPmanager * new_LinearHSPmanager_simple_heuristic(HSPmanager * hspm,LineariseHSPPara * para) 385{ 386 int i; 387 int j; 388 LinearHSPmanager * temp; 389 LinearHSPmanager * out; 390 391#ifdef LINUX_TIMER 392 struct timeval t1; 393 struct timeval t2; 394 struct timeval t3; 395 396 if( VERBOSITY_CHECK(1,para->verbosity) ) { 397 gettimeofday(&t1,NULL); 398 } 399 400#endif 401 402 temp = new_LinearHSPmanager_truncate_on_score(hspm); 403 404 if( VERBOSITY_CHECK(2,para->verbosity) ) { 405 info("Have got linear hsp manager with %d entries",temp->len); 406 } 407 408 409#ifdef LINUX_TIMER 410 if( VERBOSITY_CHECK(1,para->verbosity) ) { 411 gettimeofday(&t2,NULL); 412 } 413#endif 414 415 416 out = truncated_simple_LinearHSPmanager(temp,para); 417 418 if( VERBOSITY_CHECK(2,para->verbosity) ) { 419 info("Now got linear hsp manager with %d entries",out->len); 420 } 421 422#ifdef LINUX_TIMER 423 424 425 if( VERBOSITY_CHECK(1,para->verbosity) ) { 426 gettimeofday(&t3,NULL); 427 info("Sort breakdown: Conversion %f : Truncation %f", 428 t2.tv_sec - t1.tv_sec + ((t2.tv_usec - t1.tv_usec) * 1e-6), 429 t3.tv_sec - t2.tv_sec + ((t2.tv_usec - t2.tv_usec) * 1e-6) 430 ); 431 } 432 433#endif 434 435 /* 436 for(i=0;i<out->len;i++) { 437 fprintf(stderr,"%d, got score %d, top score %d %s\n",i,out->set[i]->score,out->set[i]->hsp[0]->score,out->set[i]->hsp[0]->target->name); 438 for(j=0;j<out->set[i]->len;j++) { 439 fprintf(stderr," HSP %d,%d score %d\n",out->set[i]->hsp[j]->query_start, 440 out->set[i]->hsp[j]->target_start, 441 out->set[i]->hsp[j]->score); 442 443 444 } 445 } 446 447 */ 448 449 info("Currently not free'ing temporary list"); 450 451 return out; 452} 453 454 455%func 456Builds a new LinearHSPmanager from hash based with heuristics 457%% 458LinearHSPmanager * new_LinearHSPmanager_heuristic_max(HSPmanager * hspm,int max_size) 459{ 460 LinearHSPmanager * temp; 461 LinearHSPmanager * out; 462 463 assert(hspm); 464 assert(max_size > 0); 465 466 temp = new_LinearHSPmanager_truncate_on_score(hspm); 467 468 out = truncated_LinearHSPmanager(temp,max_size,30,30,40); 469 470 /* free_LinearHSPmanager(temp); */ 471 472 return out; 473} 474 475%func 476Builds a LinearHSPmanager from a hash based HSP manager, using worst score truncation 477%% 478LinearHSPmanager * new_LinearHSPmanager_truncate_on_score(HSPmanager * hspm) 479{ 480 LinearHSPmanager * out; 481 482 483 out = LinearHSPmanager_alloc_std(); 484 out->mat = hard_link_CompMat(hspm->mat); 485 486 out->worst_hsp_score = hspm->tsm->worst_score; 487 488 /* for(i=0;i<hspm->tsm->length;i++) { 489 fprintf(stderr,"At position %d got score %d\n",i,hspm->tsm->score[i]); 490 } 491 */ 492 493 494 495 /* fprintf(stderr,"Before management, we have %d, worst score is %d\n",g_hash_table_size(hspm->target_hash),out->worst_hsp_score); */ 496 497 498 499 500 g_hash_table_foreach(hspm->target_hash,linearise_HSPset_truncate_on_score,out); 501 502/* sort_LinearHSPmanager(out,compare_HSPset_score); */ 503 504 return out; 505 506} 507 508 509%func 510Builds a LinearHSPmanager from a hash based HSP manager 511%% 512LinearHSPmanager * new_LinearHSPmanager_flat(HSPmanager * hspm) 513{ 514 LinearHSPmanager * out; 515 516 out = LinearHSPmanager_alloc_std(); 517 out->mat = hard_link_CompMat(hspm->mat); 518 519 520 g_hash_table_foreach(hspm->target_hash,linearise_HSPset_flat,out); 521 522 sort_LinearHSPmanager(out,compare_HSPset_score); 523 524 return out; 525 526} 527 528%func 529internal function for remapping HSPs 530%% 531void linearise_HSPset_truncate_on_score(gpointer key,gpointer value,gpointer user_data) 532{ 533 LinearHSPmanager * l = (LinearHSPmanager *) user_data; 534 HSPset * s = (HSPset *) value; 535 536 if( s->len == 1 && s->score < l->worst_hsp_score ) { 537 return; 538 } else { 539 add_LinearHSPmanager(l,hard_link_HSPset(s)); 540 } 541} 542 543 544%func 545internal function for remapping HSPs with score cutoff 546%% 547void linearise_HSPset_flat(gpointer key,gpointer value,gpointer user_data) 548{ 549 LinearHSPmanager * l = (LinearHSPmanager *) user_data; 550 HSPset * s = (HSPset *) value; 551 int i; 552 553 s->score = 0; 554 s->best_score =0; 555 556 557 for(i=0;i<s->len;i++) { 558 s->score += s->hsp[i]->score; 559 if( s->hsp[i]->score > s->best_score ) { 560 s->best_score = s->hsp[i]->score; 561 } 562 } 563 564 add_LinearHSPmanager(l,hard_link_HSPset(s)); 565 566} 567 568%func 569internal function for remapping HSPs 570%% 571void linearise_HSPset_consistent(gpointer key,gpointer value,gpointer user_data) 572{ 573 LinearHSPmanager * l = (LinearHSPmanager *) user_data; 574 HSPset * s = (HSPset *) value; 575 HSPset * add; 576 577 add = new_consistent_HSPset(s,l->min_score,l->width,l->tail); 578 if( add->len > 0 ) { 579 add_LinearHSPmanager(l,add); 580 } else { 581 free_HSPset(add); 582 } 583} 584 585 586 587%func 588Builds a new HSPmanager for a target system 589%% 590HSPmanager * new_HSPmanager(Sequence * query,CompMat * mat,int score_drop_off) 591{ 592 HSPmanager * out; 593 594 out = HSPmanager_alloc(); 595 596 out->query = hard_link_Sequence(query); 597 if( mat == NULL ) { 598 out->mat = NULL; 599 } else { 600 out->mat = hard_link_CompMat(mat); 601 } 602 603 604 out->qs = new_QuerySeqHSP(query,mat); 605 /* out->qs = NULL; */ 606 607 out->drop_off = score_drop_off; 608 out->target_hash = g_hash_table_new(g_direct_hash,g_direct_equal); 609 out->tsm = new_TopScoreManager(250); 610 /* out->cache = new_HSPCache(2000);*/ 611 out->cache = NULL; 612 out->min_score = 25; 613 out->hsp_count = 0; 614 615 return out; 616} 617 618%func 619adds a new target pair, irregardless of score 620%% 621int add_pair_HSPmanager(HSPmanager * hspm,Sequence * target,int query_pos,int target_pos) 622{ 623 return add_pair_HSPmanager_score(hspm,target,query_pos,target_pos,-1); 624} 625 626%func 627Adds a new target pair to this HSPmanager for indexing, with a min score 628%% 629int add_pair_HSPmanager_score(HSPmanager * hspm,Sequence * target,int query_pos,int target_pos,int min_score) 630{ 631 HSPset * set; 632 int i; 633 HSP * hsp; 634 boolean has_set = 0; 635 636 /* see if this target is loaded into the manager */ 637 638 if( (set = g_hash_table_lookup(hspm->target_hash,(gpointer)target)) != NULL ) { 639 has_set = 1; 640 } 641 642 /* set is now the HSPset. Ensure this position is not already accounted for 643 in the set */ 644 645 if( has_set == 1 ) { 646 if( set->last_accessed != -1 && ON_HSP_MACRO(set->hsp[set->last_accessed],query_pos,target_pos) ) { 647 return set->hsp[set->last_accessed]->score; 648 } 649 650 for(i=0;i<set->len;i++) { 651 if( ON_HSP_MACRO(set->hsp[i],query_pos,target_pos) == TRUE ) { 652 set->last_accessed = i; 653 return set->hsp[set->last_accessed]->score; 654 } 655 } 656 } 657 658 if( hspm->qs != NULL ) { 659 hsp = new_HSP_QuerySeqHSP(hspm->cache,hspm->qs,target,query_pos,target_pos,hspm->mat,hspm->drop_off,min_score); 660 } else { 661 hsp = new_HSP(hspm->cache,hspm->query,target,query_pos,target_pos,hspm->mat,hspm->drop_off); 662 } 663 664 if( hsp == NULL ) { 665 /* internal min score optimisation */ 666 return 0; 667 } 668 669 670 if( hsp->score < hspm->min_score ) { 671 /* fprintf(stderr,"hsp being lost due to min score %d\n",hsp->score); */ 672 free_HSP(hsp); 673 return 0; 674 } 675 676 hspm->hsp_count++; 677 678 if( has_set == 0 ) { 679 set = HSPset_alloc_std(); 680 g_hash_table_insert(hspm->target_hash,(gpointer)target,set); 681 } 682 683 684 set->score += hsp->score; 685 if( hsp->score > set->best_score ) { 686 set->best_score = hsp->score; 687 add_score_TopScoreManager(hspm->tsm,set->best_score); 688 } 689 690 691 add_HSPset(set,hsp); 692 set->last_accessed = set->len-1; 693 694 return hsp->score; 695} 696 697%func 698Adds a new HSP when all info is known 699%% 700boolean add_new_HSP_HSPmanager(HSPmanager * hspm,Sequence * target,int query_start,int target_start,int length,int score) 701{ 702 HSPset * set; 703 HSP * hsp; 704 705 /* see if this target is loaded into the manager */ 706 707 if( (set = g_hash_table_lookup(hspm->target_hash,(gpointer)target)) == NULL ) { 708 set = HSPset_alloc_std(); 709 g_hash_table_insert(hspm->target_hash,(gpointer)target,set); 710 } 711 712 713 if( hspm->cache != NULL ) { 714 hsp = HSP_alloc_cache(hspm->cache); 715 } else { 716 hsp = HSP_alloc(); 717 } 718 719 hsp->query = hard_link_Sequence(hspm->query); 720 hsp->target= hard_link_Sequence(target); 721 hsp->score = score; 722 hsp->target_start = target_start; 723 hsp->query_start = query_start; 724 hsp->length = length; 725 726 727 add_HSPset(set,hsp); 728 729 return TRUE; 730} 731 732%func 733Frees the HSPsets 734%% 735void free_ghash_HSPsets(gpointer key,gpointer value,gpointer user_data) 736{ 737 int i; 738 HSPset * val = (HSPset *) value; 739 HSPCache * cache = (HSPCache*) user_data; 740 741 if( val->dynamite_hard_link > 1 ) { 742 val->dynamite_hard_link--; 743 return; 744 } 745 746 if( cache != NULL ) { 747 for(i=0;i<val->len;i++) { 748 free_HSP_cache(cache,val->hsp[i]); 749 } 750 val->len = 0; 751 } 752 753 free_HSPset(val); 754} 755 756%func 757Frees the HSPmanager 758%% 759!deconstructor 760HSPmanager * free_HSPmanager(HSPmanager * h) 761{ 762 g_hash_table_foreach(h->target_hash,free_ghash_HSPsets,h->cache); 763 g_hash_table_destroy(h->target_hash); 764 free_CompMat(h->mat); 765 if( h->tsm != NULL ) { 766 free_TopScoreManager(h->tsm); 767 } 768 if( h->qs != NULL ) { 769 free_QuerySeqHSP(h->qs); 770 } 771 772 ckfree(h); 773 return NULL; 774} 775 776 777%func 778builds a new HSP for these sequences 779%% 780HSP * new_HSP_QuerySeqHSP(HSPCache * cache,QuerySeqHSP * query,Sequence * target,int query_pos,int target_pos,CompMat * mat,int score_drop_off,int min_score) 781{ 782 int i,j; 783 int ii,jj; 784 int temp_score; 785 int t; 786 int pause_i,pause_j; 787 int score = 0; 788 HSP * out = NULL; 789 const char * q_seq; 790 const char * t_seq; 791 const int * self_score; 792 793 int overall_score; 794 int query_start; 795 int target_start; 796 797 798 /* we count the start position twice */ 799 overall_score = -(query->score[query_pos][target->seq[target_pos]-'A']); 800 801 q_seq = query->query->seq; 802 t_seq = target->seq; 803 self_score = query->self_score; 804 805 pause_i = i = query_pos; 806 pause_j = j = target_pos; 807 808 /* go upstream first */ 809 810 for(score=0;i >= 0 && j >= 0;i--,j--) { 811 812 /* optimise identical matches */ 813 if( toupper(q_seq[i]) == toupper(t_seq[j]) ) { 814 score += self_score[i]; 815 continue; 816 } 817 818 819 t= query->score[i][target->seq[j]-'A']; 820 821 if( t >= 0 ) { 822 /* this is a positive score, we are on a high scoring run, so add and move on*/ 823 score += t; 824 } else { 825 /* negative score. i+1,j+1 was our last best score */ 826 for(temp_score = t,ii=i-1,jj=j-1;temp_score >= -score_drop_off && ii >= 0 && jj >= 0 && temp_score < 0;ii--,jj--) { 827 828 temp_score += query->score[ii][target->seq[jj]-'A']; 829 } 830 831 if( temp_score >= 0 ) { 832 /* new maximum reached */ 833 i = ii; 834 j = jj; 835 score += temp_score; 836 continue; /* back to main loop */ 837 } else { 838 /* either temp_score < -drop_off or something else */ 839 break; 840 } 841 } 842 } 843 844 /* set start position */ 845 846 query_start = i+1; 847 target_start = j+1; 848 overall_score += score; 849 850 851 /* downstream */ 852 853 854 for(score=0,i=query_pos,j=target_pos;i < query->len && j < target->len;i++,j++) { 855 856 /* optimise identical matches */ 857 if( toupper(q_seq[i]) == toupper(t_seq[j]) ) { 858 score += self_score[i]; 859 continue; 860 } 861 862 863 t= query->score[i][target->seq[j]-'A']; 864 /* fprintf(stderr,"Doing %d %d, [%c,%c] off %d score %d\n",i,j,query->query->seq[i],target->seq[j],t,score); */ 865 if( t >= 0 ) { 866 /* this is a positive score, we are on a high scoring run, so add and move on*/ 867 score += t; 868 } else { 869 /* negative score. i-1,j-1 was our last best score */ 870 for(temp_score = t,ii=i+1,jj=j+1;temp_score >= -score_drop_off && ii < query->len && jj < target->len && temp_score < 0;ii++,jj++) { 871 temp_score += query->score[ii][target->seq[jj]-'A']; 872 /* fprintf(stderr,"Negative looping %d,%d temp %d [%c,%c]\n",ii,jj,temp_score,query->query->seq[ii],target->seq[jj]); */ 873 874 } 875 876 if( temp_score >= 0 ) { 877 /* new good score reached */ 878 i = ii; 879 j = jj; 880 score += temp_score; 881 continue; /* back to main loop */ 882 } else { 883 /* either temp_score < -drop_off or something else */ 884 break; 885 } 886 } 887 } 888 889 if( overall_score + score < min_score ) { 890 return NULL; 891 } 892 893 /* we have a valid HSP. Ask for memory and return */ 894 895 896 if( cache != NULL ) { 897 out = HSP_alloc_cache(cache); 898 } else { 899 out = HSP_alloc(); 900 } 901 902 out->query_start = query_start; 903 out->target_start = target_start; 904 out->score = overall_score + score; 905 906 out->query = hard_link_Sequence(query->query); 907 out->target= hard_link_Sequence(target); 908 909 /* set length and score */ 910 out->length = i-1 - out->query_start +1; 911 912 913 return out; 914} 915 916 917 918 919%func 920Builds a new QuerySeqHSP from Sequence and Matrix 921%% 922QuerySeqHSP * new_QuerySeqHSP(Sequence * seq,CompMat * mat) 923{ 924 QuerySeqHSP * out; 925 int i; 926 927 assert(seq); 928 assert(mat); 929 930 out = QuerySeqHSP_alloc(); 931 out->query = hard_link_Sequence(seq); 932 out->score = (int **)calloc(seq->len,sizeof(int*)); 933 out->self_score = (int*) calloc(seq->len,sizeof(int)); 934 out->len = seq->len; 935 936 for(i=0;i<seq->len;i++) { 937 out->score[i] = mat->comp[toupper(seq->seq[i])-'A']; 938 out->self_score[i] = mat->comp[toupper(seq->seq[i])-'A'][toupper(seq->seq[i])-'A']; 939 } 940 941 return out; 942} 943