1 /* scorestack.c
2  * SRE, Tue Aug 17 09:50:39 1993
3  *
4  * For unidirectional scanning search procedures, implement
5  * a score reporting system that filters out hits that overlap
6  * with higher-scoring printed scores.
7  *
8  * The simplest rule to use would be to keep a record of the last hit;
9  * on receiving a new hit: 1) if new hit overlaps with last hit and
10  * new hit is higher, replace last with new; 2) if new hit overlaps
11  * with last hit and new hit is lower, ignore new; 3) if new hit
12  * does not overlap with last, report last and assign new to last.
13  * At end, report last. This is essentially the rule used by the
14  * original hmm and cove scanning procedures.
15  *
16  * There is a small weakness in this strategy, in that for three
17  * hits A > B > C which all overlap, only A will be reported; but
18  * although A overlaps B and B overlaps C, A may not overlap C.
19  * (Will this ever happen in reality? I dunno. I don't want to
20  * be surprised.)
21  *
22  * Thus, this more complicated strategy.
23  *    Keep a stack of last hits.
24  *    On receiving a new hit:
25  *      1) if new overlaps last and new >  last, push new onto stack
26  *      2) if new overlaps last and new <= last, ignore new
27  *      3) if new doesn't overlap, resolve stack; start new stack and
28  *         push new onto it.
29  *    At end: resolve stack.
30  *
31  *    Stack resolution:
32  *      set "previously reported hit" to -1,-1 so it won't overlap w/ anything
33  *      while something is in the stack:
34  *         pop top hit off stack
35  *         if it overlaps with previously reported hit, continue;
36  *         if it doesn't overlap, report it
37  *
38  *    Testing overlap:
39  *      Given two subsequences with endpoints al,ar and bl, br,
40  *      with no other knowledge, we would need to test whether any of
41  *      the four endpoints are within the opposing subsequence.
42  *      However, because we're scanning unidirectionally, we know
43  *      that the new right end is greater than the old right end,
44  *      so we only need to test whether the old right end  >= new left
45  *      end.
46  *
47  * External function:
48  *
49  * ReportScanHit()   - report a hit
50  *                     or, if reported coords are -1,-1, resolve old
51  *                     stack, cleanup and exit.
52  *
53  */
54 
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <limits.h>
58 
59 #ifdef MEMDEBUG
60 #include "dbmalloc.h"
61 #endif
62 
63 #include "squid.h"
64 
65 /* Data structure for the stack of previous hits;
66  * declarations of the functions to manipulate it.
67  */
68 struct hitstack_s {
69   int    left;			/* left coord of matched segment  */
70   int    right;			/* right coord of matched segment */
71   double score;			/* score of match                 */
72   struct hitstack_s *nxt;       /* pointer to next elem in stack  */
73 };
74 static struct hitstack_s *init_hitstack(void);
75 static void push_hitstack(struct hitstack_s *hstack,int left,int right, double score);
76 static int  pop_hitstack(struct hitstack_s *hstack, int *ret_left, int *ret_right, double *ret_score);
77 static void free_hitstack(struct hitstack_s *hstack);
78 
79 
80 
81 
82 /* Function: ReportScanHit()
83  *
84  * Purpose:  Caller reports a hit during a search scan, and
85  *           provides a pointer to a function we can call to
86  *           print non-overlapping hits. Caller reports
87  *           -1,-1 for coords to request cleanup and end.
88  *
89  *           Two special cases must be dealt with:
90  *             INIT: If the hit stack hasn't been started yet,
91  *                   we need to initialize it before doing
92  *                   anything else
93  *             END:  If coords are -1,-1, we resolve the stack
94  *                   and cleanup; caller is finished with us
95  *                   for now.
96  *
97  * Args:     left       - left coord of hit segment
98  *           right      - right coord of hit segment
99  *           score      - score of the hit
100  *           print_hit  - pointer to a function to print
101  *                        nonoverlapping hits
102  *
103  * Return:   1 on success, 0 on failure.
104  */
105 int
ReportScanHit(int left,int right,double score,int (* print_hit)(int,int,double))106 ReportScanHit(int      left,
107 	      int      right,
108 	      double   score,
109 	      int    (*print_hit)(int,int,double))
110 {
111   static struct hitstack_s *hstack = NULL;   /* static local handle; set to NULL on 1st entry   */
112   static int oldright = -1;	             /* -1 is guaranteed not to overlap w/ first report */
113   int    oldleft;
114   int    newleft, newright;
115   double newscore;
116 
117   /* Check whether this is first entry;
118    * init hit stack if so.
119    */
120   if (hstack == NULL) hstack = init_hitstack();
121 
122 
123   /* Check whether we have to resolve the old stack:
124    * if caller is reporting it's done (-1,-1 coords),
125    * or if new hit doesn't overlap last stacked hit.
126    */
127   if (left > oldright || (left == -1 && right == -1))
128     {
129       /* Stack resolution.
130        */
131       oldleft = INT_MAX;
132       while (pop_hitstack(hstack, &newleft, &newright, &newscore))
133 	{
134 				/* does this hit not overlap w/ previous printed one? */
135 	  if (newright < oldleft)
136 	    {
137 	      (*print_hit)(newleft, newright, newscore);
138 	      oldleft = newleft;
139 	    }
140 	}
141       free_hitstack(hstack);
142       hstack   = NULL;
143       oldright = -1;
144 
145       /* Start new stack, if not done.
146        */
147       if (left != -1 || right != -1)
148 	{
149 	  hstack = init_hitstack();
150 	  push_hitstack(hstack, left, right, score);
151 	  oldright = right;
152 	}
153     }
154 
155 
156   /* else, they overlap; if new reported score is better than last one,
157    * push new one. We're guaranteed to have something in
158    * the stack, so we can use the score in hstack->nxt->score.
159    * Reset oldright to be the new right edge of the stack, if we add something.
160    */
161   else if (score > hstack->nxt->score)
162     {
163       push_hitstack(hstack, left, right, score);
164       oldright = right;
165     }
166 
167   /* else, they overlap and the newly reported score
168    * isn't better, so we ignore it.
169    */
170   return 1;
171 }
172 
173 
174 
175 
176 
177 
178 
179 
180 /* Functions: init_hitstack()
181  *            push_hitstack()
182  *            pop_hitstack()
183  *            free_hitstack()
184  *
185  * Purpose:   Implementation of the pushdown stack for
186  *            keeping old hit positions and scores.
187  *
188  *            The hitstack has a dummy begin element,
189  *            so the first legitimate element is
190  *            hstack->nxt. The last legitimate element
191  *            has a NULL nxt pointer.
192  */
193 static struct hitstack_s *
init_hitstack(void)194 init_hitstack(void)
195 {
196   struct hitstack_s *hstack;
197 
198   if ((hstack = (struct hitstack_s *) malloc (sizeof(struct hitstack_s))) == NULL)
199     Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
200   hstack->nxt = NULL;
201   return hstack;
202 }
203 static void
push_hitstack(struct hitstack_s * hstack,int left,int right,double score)204 push_hitstack(struct hitstack_s *hstack,
205 	      int                left,
206 	      int                right,
207 	      double             score)
208 {
209   struct hitstack_s *new;
210 
211   if ((new = (struct hitstack_s *) malloc (sizeof(struct hitstack_s))) == NULL)
212     Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
213 
214   new->left   = left;
215   new->right  = right;
216   new->score  = score;
217 
218   new->nxt    = hstack->nxt;
219   hstack->nxt = new;
220 }
221 static int
pop_hitstack(struct hitstack_s * hstack,int * ret_left,int * ret_right,double * ret_score)222 pop_hitstack(struct hitstack_s *hstack,
223 	     int               *ret_left,
224 	     int               *ret_right,
225 	     double            *ret_score)
226 {
227   struct hitstack_s *old;
228 
229   if (hstack->nxt == NULL) return 0;
230 
231   old         = hstack->nxt;
232   hstack->nxt = old->nxt;
233 
234   *ret_left  = old->left;
235   *ret_right = old->right;
236   *ret_score = old->score;
237 
238   free(old);
239   return 1;
240 }
241 static void
free_hitstack(struct hitstack_s * hstack)242 free_hitstack(struct hitstack_s *hstack)
243 {
244   int left, right;
245   double score;
246 
247   while (pop_hitstack(hstack, &left, &right, &score) != 0)
248     ; /* do nothing */
249   free(hstack);
250 }
251