1
2%{
3
4#include "sequence.h"
5#include "compmat.h"
6#include "glib.h"
7#include "hsp.h"
8
9#define UNFEASIBLY_LARGE_SCORE 1000000
10
11
12
13%}
14
15
16struct TopScoreManager
17int * score
18int length
19int current_pos
20int worst_score
21int worst_position
22
23struct QuerySeqHSP
24Sequence * query
25int ** score !link
26int * self_score
27int len
28
29struct HSPmanager
30GHashTable * target_hash !link
31Sequence * query
32CompMat * mat
33int drop_off
34int min_score !def=40
35TopScoreManager * tsm;
36QuerySeqHSP * qs
37HSPCache * cache
38int hsp_count
39
40
41struct LineariseHSPPara
42int max_size
43int min_score
44int width
45int tail
46int verbosity
47
48
49
50%{
51#include "hsphandler.h"
52
53
54%func
55Makes a new topscore manager of a specific length
56%%
57TopScoreManager * new_TopScoreManager(int length)
58{
59
60
61  TopScoreManager * tsm;
62
63  tsm = TopScoreManager_alloc();
64  tsm->score = calloc(length,sizeof(int));
65  tsm->current_pos = 0;
66  tsm->length = length;
67
68  tsm->worst_score = 0;
69  tsm->worst_position = -1;
70
71  return tsm;
72}
73
74
75%func
76Adds a top score
77%%
78boolean add_score_TopScoreManager(TopScoreManager * tsm,int score)
79{
80  int i;
81  int temp_score;
82  int temp_pos;
83
84/*  fprintf(stderr,"Looking at %d\n",score);*/
85
86  if( tsm->current_pos < tsm->length ) {
87    tsm->score[tsm->current_pos++] = score;
88    return TRUE;
89  }
90
91
92  if( tsm->worst_position == -1 ) {
93    /* fprintf(stderr,"Recalculating...\n"); */
94    /* need to recalculate top score */
95    for(i=1,temp_score = tsm->score[0],temp_pos = 0;i<tsm->length;i++) {
96      if( temp_score > tsm->score[i] ) {
97	temp_score = tsm->score[i];
98	temp_pos = i;
99      }
100    }
101    tsm->worst_score = temp_score;
102    tsm->worst_position = temp_pos;
103  }
104
105  if( score < tsm->worst_score ) {
106/*    fprintf(stderr,"Ignoring... %d vs %d\n",score,tsm->worst_score); */
107    return FALSE;
108  }
109
110/*  fprintf(stderr,"Resetting...\n"); */
111  tsm->score[tsm->worst_position] = score;
112  tsm->worst_position = -1;
113
114  return TRUE;
115}
116
117%func
118A simpler truncation method, using diagonals
119%%
120LinearHSPmanager * truncated_simple_LinearHSPmanager(LinearHSPmanager * lm,LineariseHSPPara * para)
121{
122  LinearHSPmanager * out;
123  int i;
124  int j;
125  int new_score;
126
127  int diag_one;
128  int diag_two;
129
130  int query_position[500];
131  int k;
132  int aa_count;
133  int query_offset;
134/*
135  struct timeval t1;
136  struct timeval t2;
137  struct timeval t3;
138*/
139
140/*  gettimeofday(&t1,NULL);*/
141
142  if( VERBOSITY_CHECK(2,para->verbosity) ) {
143    info("Input linear management of %d entries\n",lm->len);
144  }
145
146
147  for(i=0;i<lm->len;i++) {
148    /* rescore this set wrt to top diagonal */
149    sort_HSPset_by_score(lm->set[i]);
150    new_score = lm->set[i]->hsp[0]->score;
151    diag_one = lm->set[i]->hsp[0]->query_start - lm->set[i]->hsp[0]->target_start;
152
153    if( VERBOSITY_CHECK(5,para->verbosity) ) {
154      info("Looking at %s with starting hsp score of %d\n",lm->set[i]->hsp[0]->target->name,new_score);
155    }
156
157    query_offset = lm->set[i]->hsp[0]->query_start - 200;
158
159    for(k=0;k<500;k++) {
160      query_position[k] = 0;
161    }
162    for(k=0;k<lm->set[i]->hsp[0]->length && lm->set[i]->hsp[0]->query_start - query_offset+k < 500;k++) {
163      query_position[lm->set[i]->hsp[0]->query_start - query_offset+k] = 1;
164    }
165
166
167    for(j=1;j<lm->set[i]->len;j++) {
168
169      if( lm->set[i]->hsp[j]->score < para->min_score ) {
170	if( VERBOSITY_CHECK(8,para->verbosity) ) {
171	  info("  ...not accepting hsp %d,%d due to min score",lm->set[i]->hsp[j]->query_start,lm->set[i]->hsp[j]->target_start);
172	}
173
174	break;
175      }
176
177      diag_two = lm->set[i]->hsp[j]->query_start - lm->set[i]->hsp[j]->target_start;
178      if( abs(diag_one - diag_two) < para->width && abs(lm->set[i]->hsp[0]->query_start - lm->set[i]->hsp[j]->query_start) < para->tail) {
179
180	/* this is now in the right area                     */
181	/* but we need to test for overlap on existing cases */
182
183
184	/*	fprintf(stderr,"  .... looking at hsp overlap\n");*/
185
186	for(aa_count = 0,k=0;k<lm->set[i]->hsp[j]->length && lm->set[i]->hsp[j]->query_start - query_offset+k < 500;k++) {
187	  if( query_position[lm->set[i]->hsp[j]->query_start - query_offset+k] == 1 ) {
188	    aa_count++;
189	  }
190	}
191
192
193	/* if we have more than 15% of this hsp "accounted for" move on */
194
195	if(aa_count/(double)lm->set[i]->hsp[j]->length > 0.15 ) {
196	  continue;
197	}
198
199	/* fprintf(stderr,"   .... updating HSP overlap\n");*/
200
201	/* now set these positions as used */
202
203	for(aa_count = 0,k=0;k<lm->set[i]->hsp[j]->length && lm->set[i]->hsp[j]->query_start - query_offset+k < 500;k++) {
204	  query_position[lm->set[i]->hsp[j]->query_start - query_offset+k] = 1;
205	}
206
207	new_score += lm->set[i]->hsp[j]->score;
208	if( VERBOSITY_CHECK(5,para->verbosity) ) {
209	  info("  ..accepting hsp on %s %d,%d new score %d",lm->set[i]->hsp[j]->target->name,lm->set[i]->hsp[j]->query_start,lm->set[i]->hsp[j]->target_start,new_score);
210	}
211      } else {
212	if( VERBOSITY_CHECK(5,para->verbosity) ) {
213	  info("  ...not accepting hsp %d,%d due to diagonal width",lm->set[i]->hsp[j]->query_start,lm->set[i]->hsp[j]->target_start);
214
215	}
216
217      }
218    }
219
220    if( VERBOSITY_CHECK(3,para->verbosity) ) {
221      info("Looking at %s with final score of %d",lm->set[i]->hsp[0]->target->name,new_score);
222    }
223
224    lm->set[i]->score = new_score;
225  }
226
227  /*  gettimeofday(&t2,NULL); */
228
229  if( VERBOSITY_CHECK(2,para->verbosity) ) {
230    info("sorting %d items",lm->len);
231  }
232
233  /*qsort(lm->set,lm->len,sizeof(HSPset*),compare_HSPset_score_qsort);*/
234
235  sort_LinearHSPmanager(lm,compare_HSPset_score);
236
237
238  /*  gettimeofday(&t3,NULL);
239
240  info("truncation clock point: rescoring %f : sorting %f",
241	  t2.tv_sec - t1.tv_sec + ((t2.tv_usec - t1.tv_usec) * 1e-6),
242	  t3.tv_sec - t2.tv_sec + ((t2.tv_usec - t2.tv_usec) * 1e-6)
243	  );
244
245  */
246
247  out = LinearHSPmanager_alloc_std();
248  out->mat = hard_link_CompMat(lm->mat);
249
250
251    for(i=0;i<lm->len;i++) {
252      if( VERBOSITY_CHECK(5,para->verbosity) ) {
253	info("Accepting hit %s position %d with score %d",lm->set[i]->hsp[0]->target->name,i,lm->set[i]->score);
254      }
255
256      add_LinearHSPmanager(out,hard_link_HSPset(lm->set[i]));
257      if( i > para->max_size ) {
258	break;
259      }
260    }
261
262
263  return out;
264}
265
266
267
268%func
269Makes a truncated linear set
270%%
271LinearHSPmanager * truncated_LinearHSPmanager(LinearHSPmanager * lm,int max_size,int min_score,int width,int tail)
272{
273
274  LinearHSPmanager * out;
275  int i,j;
276  int worst_score = UNFEASIBLY_LARGE_SCORE;
277  int worst_j = -1;
278  HSPset ** worst;
279  HSPset * trial;
280
281  assert(max_size > 0);
282
283  out = LinearHSPmanager_alloc_len(max_size+1);
284  out->mat = hard_link_CompMat(lm->mat);
285
286  sort_LinearHSPmanager(lm,compare_HSPset_score);
287
288  for(i=0;i<lm->len;i++) {
289    /* we chew up into max_size positions */
290    if( out->len < max_size ) {
291      trial = new_consistent_HSPset(lm->set[i],min_score,width,tail);
292      if( trial->len == 0 ) {
293	continue;
294      }
295
296      add_LinearHSPmanager(out,trial);
297    } else {
298          /* if have not found worst, scan to find worst */
299      if( worst_score == UNFEASIBLY_LARGE_SCORE ) {
300	for(j=0;j<out->len;j++) {
301	  if( out->set[j]->score < worst_score ) {
302	    worst_score = out->set[j]->score;
303	    worst = out->set+j;
304	    worst_j = j;
305	  }
306	}
307      }
308      /* worst_score is now the score */
309      if( lm->set[i]->score < worst_score ) {
310	break; /* otta here - we don't need to look at any more! */
311      }
312      trial = new_consistent_HSPset(lm->set[i],min_score,width,tail);
313      if( trial->len == 0 ) {
314	continue;
315      }
316
317
318      if( trial->score > worst_score ) {
319	free_HSPset(out->set[worst_j]);
320	out->set[worst_j] = trial;
321	worst_score = UNFEASIBLY_LARGE_SCORE;
322      } else {
323	free_HSPset(trial);
324      }
325    }
326  }
327
328  return out;
329
330}
331
332%func
333Makes an HSP set via heuristics
334to deal with low complexity regions
335%%
336HSPset * new_consistent_HSPset(HSPset * set,int min_score,int width,int tail)
337{
338  HSPset * out;
339  int i,j;
340  int diag_a;
341  int diag_b;
342  int eaten;
343
344  /*  fprintf(stderr,"Entering consistency for %s with %d\n",set->hsp[0]->target->name,set->len); */
345  out = HSPset_alloc_std();
346
347  sort_HSPset_by_score(set);
348
349
350  add_HSPset(out,hard_link_HSP(set->hsp[0]));
351  out->score = set->hsp[0]->score;
352
353  for(i=1;i<set->len;i++) {
354    /* check against exisiting HSPs. If fits, add into big set */
355    eaten = 0;
356
357    if( set->hsp[i]->score < min_score ) {
358      continue;
359    }
360
361    for(j=0;j<out->len;j++) {
362      diag_a = set->hsp[i]->query_start - set->hsp[i]->target_start;
363      diag_b = out->hsp[j]->query_start - out->hsp[j]->target_start;
364      if( abs(diag_a - diag_b) > 2 * width ) {
365	continue; /* does not match */
366      }
367
368      eaten = 1;
369      add_HSPset(out,hard_link_HSP(set->hsp[i]));
370      out->score += set->hsp[i]->score;
371      break;
372    }
373  }
374
375
376
377  return out;
378
379}
380
381%func
382New, simpler LinearHSPmanager with diagonal heuristics
383%%
384LinearHSPmanager * new_LinearHSPmanager_simple_heuristic(HSPmanager * hspm,LineariseHSPPara * para)
385{
386  int i;
387  int j;
388  LinearHSPmanager * temp;
389  LinearHSPmanager * out;
390
391#ifdef LINUX_TIMER
392  struct timeval t1;
393  struct timeval t2;
394  struct timeval t3;
395
396  if( VERBOSITY_CHECK(1,para->verbosity) ) {
397    gettimeofday(&t1,NULL);
398  }
399
400#endif
401
402  temp = new_LinearHSPmanager_truncate_on_score(hspm);
403
404  if( VERBOSITY_CHECK(2,para->verbosity) ) {
405    info("Have got linear hsp manager with %d entries",temp->len);
406  }
407
408
409#ifdef LINUX_TIMER
410  if( VERBOSITY_CHECK(1,para->verbosity) ) {
411    gettimeofday(&t2,NULL);
412  }
413#endif
414
415
416  out = truncated_simple_LinearHSPmanager(temp,para);
417
418  if( VERBOSITY_CHECK(2,para->verbosity) ) {
419    info("Now got linear hsp manager with %d entries",out->len);
420  }
421
422#ifdef LINUX_TIMER
423
424
425  if( VERBOSITY_CHECK(1,para->verbosity) ) {
426    gettimeofday(&t3,NULL);
427    info("Sort breakdown: Conversion %f : Truncation %f",
428	 t2.tv_sec - t1.tv_sec + ((t2.tv_usec - t1.tv_usec) * 1e-6),
429	 t3.tv_sec - t2.tv_sec + ((t2.tv_usec - t2.tv_usec) * 1e-6)
430	 );
431  }
432
433#endif
434
435  /*
436  for(i=0;i<out->len;i++) {
437        fprintf(stderr,"%d, got score %d, top score %d %s\n",i,out->set[i]->score,out->set[i]->hsp[0]->score,out->set[i]->hsp[0]->target->name);
438    for(j=0;j<out->set[i]->len;j++) {
439       fprintf(stderr,"   HSP %d,%d score %d\n",out->set[i]->hsp[j]->query_start,
440	      out->set[i]->hsp[j]->target_start,
441	      out->set[i]->hsp[j]->score);
442
443
444    }
445  }
446
447  */
448
449  info("Currently not free'ing temporary list");
450
451  return out;
452}
453
454
455%func
456Builds a new LinearHSPmanager from hash based with heuristics
457%%
458LinearHSPmanager * new_LinearHSPmanager_heuristic_max(HSPmanager * hspm,int max_size)
459{
460  LinearHSPmanager * temp;
461  LinearHSPmanager * out;
462
463  assert(hspm);
464  assert(max_size > 0);
465
466  temp = new_LinearHSPmanager_truncate_on_score(hspm);
467
468  out = truncated_LinearHSPmanager(temp,max_size,30,30,40);
469
470  /*  free_LinearHSPmanager(temp); */
471
472  return out;
473}
474
475%func
476Builds a LinearHSPmanager from a hash based HSP manager, using worst score truncation
477%%
478LinearHSPmanager * new_LinearHSPmanager_truncate_on_score(HSPmanager * hspm)
479{
480  LinearHSPmanager * out;
481
482
483  out = LinearHSPmanager_alloc_std();
484  out->mat = hard_link_CompMat(hspm->mat);
485
486  out->worst_hsp_score = hspm->tsm->worst_score;
487
488  /*  for(i=0;i<hspm->tsm->length;i++) {
489    fprintf(stderr,"At position %d got score %d\n",i,hspm->tsm->score[i]);
490  }
491  */
492
493
494
495  /*  fprintf(stderr,"Before management, we have %d, worst score is %d\n",g_hash_table_size(hspm->target_hash),out->worst_hsp_score); */
496
497
498
499
500  g_hash_table_foreach(hspm->target_hash,linearise_HSPset_truncate_on_score,out);
501
502/*  sort_LinearHSPmanager(out,compare_HSPset_score); */
503
504  return out;
505
506}
507
508
509%func
510Builds a LinearHSPmanager from a hash based HSP manager
511%%
512LinearHSPmanager * new_LinearHSPmanager_flat(HSPmanager * hspm)
513{
514  LinearHSPmanager * out;
515
516  out = LinearHSPmanager_alloc_std();
517  out->mat = hard_link_CompMat(hspm->mat);
518
519
520  g_hash_table_foreach(hspm->target_hash,linearise_HSPset_flat,out);
521
522  sort_LinearHSPmanager(out,compare_HSPset_score);
523
524  return out;
525
526}
527
528%func
529internal function for remapping HSPs
530%%
531void linearise_HSPset_truncate_on_score(gpointer key,gpointer value,gpointer user_data)
532{
533  LinearHSPmanager * l = (LinearHSPmanager *) user_data;
534  HSPset * s = (HSPset *) value;
535
536  if( s->len == 1 && s->score < l->worst_hsp_score ) {
537    return;
538  } else {
539    add_LinearHSPmanager(l,hard_link_HSPset(s));
540  }
541}
542
543
544%func
545internal function for remapping HSPs with score cutoff
546%%
547void linearise_HSPset_flat(gpointer key,gpointer value,gpointer user_data)
548{
549  LinearHSPmanager * l = (LinearHSPmanager *) user_data;
550  HSPset * s = (HSPset *) value;
551  int i;
552
553  s->score = 0;
554  s->best_score =0;
555
556
557  for(i=0;i<s->len;i++) {
558    s->score += s->hsp[i]->score;
559    if( s->hsp[i]->score > s->best_score ) {
560      s->best_score = s->hsp[i]->score;
561    }
562  }
563
564  add_LinearHSPmanager(l,hard_link_HSPset(s));
565
566}
567
568%func
569internal function for remapping HSPs
570%%
571void linearise_HSPset_consistent(gpointer key,gpointer value,gpointer user_data)
572{
573  LinearHSPmanager * l = (LinearHSPmanager *) user_data;
574  HSPset * s = (HSPset *) value;
575  HSPset * add;
576
577  add = new_consistent_HSPset(s,l->min_score,l->width,l->tail);
578  if( add->len > 0 ) {
579    add_LinearHSPmanager(l,add);
580  } else {
581    free_HSPset(add);
582  }
583}
584
585
586
587%func
588Builds a new HSPmanager for a target system
589%%
590HSPmanager * new_HSPmanager(Sequence * query,CompMat * mat,int score_drop_off)
591{
592  HSPmanager * out;
593
594  out = HSPmanager_alloc();
595
596  out->query = hard_link_Sequence(query);
597  if( mat == NULL ) {
598    out->mat = NULL;
599  } else {
600    out->mat   = hard_link_CompMat(mat);
601  }
602
603
604  out->qs = new_QuerySeqHSP(query,mat);
605  /* out->qs = NULL; */
606
607  out->drop_off = score_drop_off;
608  out->target_hash =  g_hash_table_new(g_direct_hash,g_direct_equal);
609  out->tsm = new_TopScoreManager(250);
610  /*  out->cache = new_HSPCache(2000);*/
611  out->cache = NULL;
612  out->min_score = 25;
613  out->hsp_count = 0;
614
615  return out;
616}
617
618%func
619adds a new target pair, irregardless of score
620%%
621int add_pair_HSPmanager(HSPmanager * hspm,Sequence * target,int query_pos,int target_pos)
622{
623  return add_pair_HSPmanager_score(hspm,target,query_pos,target_pos,-1);
624}
625
626%func
627Adds a new target pair to this HSPmanager for indexing, with a min score
628%%
629int add_pair_HSPmanager_score(HSPmanager * hspm,Sequence * target,int query_pos,int target_pos,int min_score)
630{
631  HSPset * set;
632  int i;
633  HSP * hsp;
634  boolean has_set = 0;
635
636  /* see if this target is loaded into the manager */
637
638  if( (set = g_hash_table_lookup(hspm->target_hash,(gpointer)target)) != NULL ) {
639    has_set = 1;
640  }
641
642  /* set is now the HSPset. Ensure this position is not already accounted for
643     in the set */
644
645  if( has_set == 1 ) {
646    if( set->last_accessed != -1 && ON_HSP_MACRO(set->hsp[set->last_accessed],query_pos,target_pos) ) {
647      return set->hsp[set->last_accessed]->score;
648    }
649
650    for(i=0;i<set->len;i++) {
651      if( ON_HSP_MACRO(set->hsp[i],query_pos,target_pos) == TRUE ) {
652	set->last_accessed = i;
653	return set->hsp[set->last_accessed]->score;
654      }
655    }
656  }
657
658  if( hspm->qs != NULL ) {
659    hsp = new_HSP_QuerySeqHSP(hspm->cache,hspm->qs,target,query_pos,target_pos,hspm->mat,hspm->drop_off,min_score);
660  } else {
661    hsp = new_HSP(hspm->cache,hspm->query,target,query_pos,target_pos,hspm->mat,hspm->drop_off);
662  }
663
664  if( hsp == NULL ) {
665    /* internal min score optimisation */
666    return 0;
667  }
668
669
670  if( hsp->score < hspm->min_score ) {
671    /* fprintf(stderr,"hsp being lost due to min score %d\n",hsp->score); */
672    free_HSP(hsp);
673    return 0;
674  }
675
676  hspm->hsp_count++;
677
678  if( has_set == 0 )  {
679    set = HSPset_alloc_std();
680    g_hash_table_insert(hspm->target_hash,(gpointer)target,set);
681  }
682
683
684  set->score += hsp->score;
685  if( hsp->score > set->best_score ) {
686    set->best_score = hsp->score;
687    add_score_TopScoreManager(hspm->tsm,set->best_score);
688  }
689
690
691  add_HSPset(set,hsp);
692  set->last_accessed = set->len-1;
693
694  return hsp->score;
695}
696
697%func
698Adds a new HSP when all info is known
699%%
700boolean add_new_HSP_HSPmanager(HSPmanager * hspm,Sequence * target,int query_start,int target_start,int length,int score)
701{
702  HSPset * set;
703  HSP * hsp;
704
705  /* see if this target is loaded into the manager */
706
707  if( (set = g_hash_table_lookup(hspm->target_hash,(gpointer)target)) == NULL ) {
708    set = HSPset_alloc_std();
709    g_hash_table_insert(hspm->target_hash,(gpointer)target,set);
710  }
711
712
713  if( hspm->cache != NULL ) {
714    hsp = HSP_alloc_cache(hspm->cache);
715  } else {
716    hsp = HSP_alloc();
717  }
718
719  hsp->query = hard_link_Sequence(hspm->query);
720  hsp->target= hard_link_Sequence(target);
721  hsp->score = score;
722  hsp->target_start = target_start;
723  hsp->query_start = query_start;
724  hsp->length = length;
725
726
727  add_HSPset(set,hsp);
728
729  return TRUE;
730}
731
732%func
733Frees the HSPsets
734%%
735void free_ghash_HSPsets(gpointer key,gpointer value,gpointer user_data)
736{
737  int i;
738  HSPset * val = (HSPset *) value;
739  HSPCache * cache = (HSPCache*) user_data;
740
741  if( val->dynamite_hard_link > 1 ) {
742    val->dynamite_hard_link--;
743    return;
744  }
745
746  if( cache != NULL ) {
747    for(i=0;i<val->len;i++) {
748      free_HSP_cache(cache,val->hsp[i]);
749    }
750    val->len = 0;
751  }
752
753  free_HSPset(val);
754}
755
756%func
757Frees the HSPmanager
758%%
759!deconstructor
760HSPmanager * free_HSPmanager(HSPmanager * h)
761{
762  g_hash_table_foreach(h->target_hash,free_ghash_HSPsets,h->cache);
763  g_hash_table_destroy(h->target_hash);
764  free_CompMat(h->mat);
765  if( h->tsm != NULL ) {
766    free_TopScoreManager(h->tsm);
767  }
768  if( h->qs != NULL ) {
769    free_QuerySeqHSP(h->qs);
770  }
771
772  ckfree(h);
773  return NULL;
774}
775
776
777%func
778builds a new HSP for these sequences
779%%
780HSP * new_HSP_QuerySeqHSP(HSPCache * cache,QuerySeqHSP * query,Sequence * target,int query_pos,int target_pos,CompMat * mat,int score_drop_off,int min_score)
781{
782  int i,j;
783  int ii,jj;
784  int temp_score;
785  int t;
786  int pause_i,pause_j;
787  int score = 0;
788  HSP * out = NULL;
789  const char * q_seq;
790  const char * t_seq;
791  const int * self_score;
792
793  int overall_score;
794  int query_start;
795  int target_start;
796
797
798  /* we count the start position twice */
799  overall_score = -(query->score[query_pos][target->seq[target_pos]-'A']);
800
801  q_seq = query->query->seq;
802  t_seq = target->seq;
803  self_score = query->self_score;
804
805  pause_i = i = query_pos;
806  pause_j = j = target_pos;
807
808  /* go upstream first */
809
810  for(score=0;i >= 0 && j >= 0;i--,j--) {
811
812    /* optimise identical matches */
813    if( toupper(q_seq[i]) == toupper(t_seq[j]) ) {
814      score += self_score[i];
815      continue;
816    }
817
818
819    t= query->score[i][target->seq[j]-'A'];
820
821    if( t >= 0 ) {
822      /* this is a positive score, we are on a high scoring run, so add and move on*/
823      score += t;
824    } else {
825      /* negative score. i+1,j+1 was our last best score */
826      for(temp_score = t,ii=i-1,jj=j-1;temp_score >= -score_drop_off && ii >= 0 && jj >= 0 && temp_score < 0;ii--,jj--) {
827
828	temp_score += query->score[ii][target->seq[jj]-'A'];
829      }
830
831      if( temp_score >= 0 ) {
832	/* new maximum reached */
833	i = ii;
834	j = jj;
835	score += temp_score;
836	continue; /* back to main loop */
837      } else {
838	/* either temp_score < -drop_off or something else */
839	break;
840      }
841    }
842  }
843
844  /* set start position */
845
846  query_start = i+1;
847  target_start = j+1;
848  overall_score += score;
849
850
851  /* downstream */
852
853
854  for(score=0,i=query_pos,j=target_pos;i < query->len && j < target->len;i++,j++) {
855
856    /* optimise identical matches */
857    if( toupper(q_seq[i]) == toupper(t_seq[j]) ) {
858      score += self_score[i];
859      continue;
860    }
861
862
863    t= query->score[i][target->seq[j]-'A'];
864    /*    fprintf(stderr,"Doing %d %d, [%c,%c] off %d score %d\n",i,j,query->query->seq[i],target->seq[j],t,score); */
865    if( t >= 0 ) {
866      /* this is a positive score, we are on a high scoring run, so add and move on*/
867      score += t;
868    } else {
869      /* negative score. i-1,j-1 was our last best score */
870      for(temp_score = t,ii=i+1,jj=j+1;temp_score >= -score_drop_off && ii < query->len && jj < target->len && temp_score < 0;ii++,jj++) {
871	temp_score += query->score[ii][target->seq[jj]-'A'];
872	/*	fprintf(stderr,"Negative looping %d,%d temp %d [%c,%c]\n",ii,jj,temp_score,query->query->seq[ii],target->seq[jj]); */
873
874      }
875
876      if( temp_score >= 0 ) {
877	/* new good score reached */
878	i = ii;
879	j = jj;
880	score += temp_score;
881	continue; /* back to main loop */
882      } else {
883	/* either temp_score < -drop_off or something else */
884	break;
885      }
886    }
887  }
888
889  if( overall_score + score < min_score ) {
890    return NULL;
891  }
892
893  /* we have a valid HSP. Ask for memory and return */
894
895
896  if( cache != NULL ) {
897    out = HSP_alloc_cache(cache);
898  } else {
899    out = HSP_alloc();
900  }
901
902  out->query_start = query_start;
903  out->target_start = target_start;
904  out->score = overall_score + score;
905
906  out->query = hard_link_Sequence(query->query);
907  out->target= hard_link_Sequence(target);
908
909  /* set length and score */
910  out->length = i-1 - out->query_start +1;
911
912
913  return out;
914}
915
916
917
918
919%func
920Builds a new QuerySeqHSP from Sequence and Matrix
921%%
922QuerySeqHSP * new_QuerySeqHSP(Sequence * seq,CompMat * mat)
923{
924  QuerySeqHSP * out;
925  int i;
926
927  assert(seq);
928  assert(mat);
929
930  out = QuerySeqHSP_alloc();
931  out->query = hard_link_Sequence(seq);
932  out->score = (int **)calloc(seq->len,sizeof(int*));
933  out->self_score = (int*) calloc(seq->len,sizeof(int));
934  out->len = seq->len;
935
936  for(i=0;i<seq->len;i++) {
937    out->score[i] = mat->comp[toupper(seq->seq[i])-'A'];
938    out->self_score[i] = mat->comp[toupper(seq->seq[i])-'A'][toupper(seq->seq[i])-'A'];
939  }
940
941  return out;
942}
943