1 /* scorestack.c
2 * SRE, Tue Aug 17 09:50:39 1993
3 *
4 * For unidirectional scanning search procedures, implement
5 * a score reporting system that filters out hits that overlap
6 * with higher-scoring printed scores.
7 *
8 * The simplest rule to use would be to keep a record of the last hit;
9 * on receiving a new hit: 1) if new hit overlaps with last hit and
10 * new hit is higher, replace last with new; 2) if new hit overlaps
11 * with last hit and new hit is lower, ignore new; 3) if new hit
12 * does not overlap with last, report last and assign new to last.
13 * At end, report last. This is essentially the rule used by the
14 * original hmm and cove scanning procedures.
15 *
16 * There is a small weakness in this strategy, in that for three
17 * hits A > B > C which all overlap, only A will be reported; but
18 * although A overlaps B and B overlaps C, A may not overlap C.
19 * (Will this ever happen in reality? I dunno. I don't want to
20 * be surprised.)
21 *
22 * Thus, this more complicated strategy.
23 * Keep a stack of last hits.
24 * On receiving a new hit:
25 * 1) if new overlaps last and new > last, push new onto stack
26 * 2) if new overlaps last and new <= last, ignore new
27 * 3) if new doesn't overlap, resolve stack; start new stack and
28 * push new onto it.
29 * At end: resolve stack.
30 *
31 * Stack resolution:
32 * set "previously reported hit" to -1,-1 so it won't overlap w/ anything
33 * while something is in the stack:
34 * pop top hit off stack
35 * if it overlaps with previously reported hit, continue;
36 * if it doesn't overlap, report it
37 *
38 * Testing overlap:
39 * Given two subsequences with endpoints al,ar and bl, br,
40 * with no other knowledge, we would need to test whether any of
41 * the four endpoints are within the opposing subsequence.
42 * However, because we're scanning unidirectionally, we know
43 * that the new right end is greater than the old right end,
44 * so we only need to test whether the old right end >= new left
45 * end.
46 *
47 * External function:
48 *
49 * ReportScanHit() - report a hit
50 * or, if reported coords are -1,-1, resolve old
51 * stack, cleanup and exit.
52 *
53 */
54
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <limits.h>
58
59 #ifdef MEMDEBUG
60 #include "dbmalloc.h"
61 #endif
62
63 #include "squid.h"
64
65 /* Data structure for the stack of previous hits;
66 * declarations of the functions to manipulate it.
67 */
68 struct hitstack_s {
69 int left; /* left coord of matched segment */
70 int right; /* right coord of matched segment */
71 double score; /* score of match */
72 struct hitstack_s *nxt; /* pointer to next elem in stack */
73 };
74 static struct hitstack_s *init_hitstack(void);
75 static void push_hitstack(struct hitstack_s *hstack,int left,int right, double score);
76 static int pop_hitstack(struct hitstack_s *hstack, int *ret_left, int *ret_right, double *ret_score);
77 static void free_hitstack(struct hitstack_s *hstack);
78
79
80
81
82 /* Function: ReportScanHit()
83 *
84 * Purpose: Caller reports a hit during a search scan, and
85 * provides a pointer to a function we can call to
86 * print non-overlapping hits. Caller reports
87 * -1,-1 for coords to request cleanup and end.
88 *
89 * Two special cases must be dealt with:
90 * INIT: If the hit stack hasn't been started yet,
91 * we need to initialize it before doing
92 * anything else
93 * END: If coords are -1,-1, we resolve the stack
94 * and cleanup; caller is finished with us
95 * for now.
96 *
97 * Args: left - left coord of hit segment
98 * right - right coord of hit segment
99 * score - score of the hit
100 * print_hit - pointer to a function to print
101 * nonoverlapping hits
102 *
103 * Return: 1 on success, 0 on failure.
104 */
105 int
ReportScanHit(int left,int right,double score,int (* print_hit)(int,int,double))106 ReportScanHit(int left,
107 int right,
108 double score,
109 int (*print_hit)(int,int,double))
110 {
111 static struct hitstack_s *hstack = NULL; /* static local handle; set to NULL on 1st entry */
112 static int oldright = -1; /* -1 is guaranteed not to overlap w/ first report */
113 int oldleft;
114 int newleft, newright;
115 double newscore;
116
117 /* Check whether this is first entry;
118 * init hit stack if so.
119 */
120 if (hstack == NULL) hstack = init_hitstack();
121
122
123 /* Check whether we have to resolve the old stack:
124 * if caller is reporting it's done (-1,-1 coords),
125 * or if new hit doesn't overlap last stacked hit.
126 */
127 if (left > oldright || (left == -1 && right == -1))
128 {
129 /* Stack resolution.
130 */
131 oldleft = INT_MAX;
132 while (pop_hitstack(hstack, &newleft, &newright, &newscore))
133 {
134 /* does this hit not overlap w/ previous printed one? */
135 if (newright < oldleft)
136 {
137 (*print_hit)(newleft, newright, newscore);
138 oldleft = newleft;
139 }
140 }
141 free_hitstack(hstack);
142 hstack = NULL;
143 oldright = -1;
144
145 /* Start new stack, if not done.
146 */
147 if (left != -1 || right != -1)
148 {
149 hstack = init_hitstack();
150 push_hitstack(hstack, left, right, score);
151 oldright = right;
152 }
153 }
154
155
156 /* else, they overlap; if new reported score is better than last one,
157 * push new one. We're guaranteed to have something in
158 * the stack, so we can use the score in hstack->nxt->score.
159 * Reset oldright to be the new right edge of the stack, if we add something.
160 */
161 else if (score > hstack->nxt->score)
162 {
163 push_hitstack(hstack, left, right, score);
164 oldright = right;
165 }
166
167 /* else, they overlap and the newly reported score
168 * isn't better, so we ignore it.
169 */
170 return 1;
171 }
172
173
174
175
176
177
178
179
180 /* Functions: init_hitstack()
181 * push_hitstack()
182 * pop_hitstack()
183 * free_hitstack()
184 *
185 * Purpose: Implementation of the pushdown stack for
186 * keeping old hit positions and scores.
187 *
188 * The hitstack has a dummy begin element,
189 * so the first legitimate element is
190 * hstack->nxt. The last legitimate element
191 * has a NULL nxt pointer.
192 */
193 static struct hitstack_s *
init_hitstack(void)194 init_hitstack(void)
195 {
196 struct hitstack_s *hstack;
197
198 if ((hstack = (struct hitstack_s *) malloc (sizeof(struct hitstack_s))) == NULL)
199 Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
200 hstack->nxt = NULL;
201 return hstack;
202 }
203 static void
push_hitstack(struct hitstack_s * hstack,int left,int right,double score)204 push_hitstack(struct hitstack_s *hstack,
205 int left,
206 int right,
207 double score)
208 {
209 struct hitstack_s *new;
210
211 if ((new = (struct hitstack_s *) malloc (sizeof(struct hitstack_s))) == NULL)
212 Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
213
214 new->left = left;
215 new->right = right;
216 new->score = score;
217
218 new->nxt = hstack->nxt;
219 hstack->nxt = new;
220 }
221 static int
pop_hitstack(struct hitstack_s * hstack,int * ret_left,int * ret_right,double * ret_score)222 pop_hitstack(struct hitstack_s *hstack,
223 int *ret_left,
224 int *ret_right,
225 double *ret_score)
226 {
227 struct hitstack_s *old;
228
229 if (hstack->nxt == NULL) return 0;
230
231 old = hstack->nxt;
232 hstack->nxt = old->nxt;
233
234 *ret_left = old->left;
235 *ret_right = old->right;
236 *ret_score = old->score;
237
238 free(old);
239 return 1;
240 }
241 static void
free_hitstack(struct hitstack_s * hstack)242 free_hitstack(struct hitstack_s *hstack)
243 {
244 int left, right;
245 double score;
246
247 while (pop_hitstack(hstack, &left, &right, &score) != 0)
248 ; /* do nothing */
249 free(hstack);
250 }
251