1 /*
2 Copyright (c) 2003 by Stefan Kurtz and The Institute for
3 Genomic Research. This is OSI Certified Open Source Software.
4 Please see the file LICENSE for licensing information and
5 the file ACKNOWLEDGEMENTS for names of contributors to the
6 code base.
7 */
8
9 #include "debugdef.h"
10 #include "streedef.h"
11 #include "streeacc.h"
12 #include "protodef.h"
13
lcp(SYMBOL * start1,SYMBOL * end1,SYMBOL * start2,SYMBOL * end2)14 static Uint lcp(SYMBOL *start1,SYMBOL *end1,SYMBOL *start2,SYMBOL *end2)
15 {
16 register SYMBOL *ptr1 = start1,
17 *ptr2 = start2;
18
19 while(ptr1 <= end1 &&
20 ptr2 <= end2 &&
21 *ptr1 == *ptr2)
22 {
23 ptr1++;
24 ptr2++;
25 }
26 return (Uint) (ptr1-start1);
27 }
28
scanprefixfromnodestree(Suffixtree * stree,Location * loc,Bref btptr,SYMBOL * left,SYMBOL * right,Uint rescanlength)29 /*@null@*/ SYMBOL *scanprefixfromnodestree(Suffixtree *stree,Location *loc,
30 Bref btptr,SYMBOL *left,
31 SYMBOL *right,Uint rescanlength)
32 {
33 Uint *nodeptr = NULL, *largeptr = NULL, leafindex, nodedepth,
34 node, distance = 0, prefixlen, headposition, tmpnodedepth,
35 edgelen, remainingtoskip;
36 SYMBOL *lptr, *leftborder = (SYMBOL *) NULL, firstchar, edgechar = 0;
37
38 DEBUG1(4,"scanprefixfromnodestree starts at node %lu\n",
39 (Showuint) BRADDR2NUM(stree,btptr));
40 lptr = left;
41 nodeptr = btptr;
42 if(nodeptr == stree->branchtab)
43 {
44 nodedepth = 0;
45 headposition = 0;
46 } else
47 {
48 GETBOTH(nodedepth,headposition,nodeptr);
49 }
50 loc->nextnode.toleaf = False;
51 loc->nextnode.address = nodeptr;
52 loc->locstring.start = headposition;
53 loc->locstring.length = nodedepth;
54 loc->remain = 0;
55 if(rescanlength <= nodedepth)
56 {
57 remainingtoskip = 0;
58 } else
59 {
60 remainingtoskip = rescanlength - nodedepth;
61 }
62 while(True)
63 {
64 if(lptr > right) // check for empty word
65 {
66 return NULL;
67 }
68 firstchar = *lptr;
69 if(nodeptr == stree->branchtab) // at the root
70 {
71 if((node = stree->rootchildren[(Uint) firstchar]) == UNDEFINEDREFERENCE)
72 {
73 return lptr;
74 }
75 if(ISLEAF(node))
76 {
77 leafindex = GETLEAFINDEX(node);
78 loc->firstptr = stree->text + leafindex;
79 if(remainingtoskip > 0)
80 {
81 prefixlen = remainingtoskip +
82 lcp(lptr+remainingtoskip,right,
83 loc->firstptr+remainingtoskip,stree->sentinel-1);
84 } else
85 {
86 prefixlen = 1 + lcp(lptr+1,right,
87 loc->firstptr+1,stree->sentinel-1);
88 }
89 loc->previousnode = stree->branchtab;
90 loc->edgelen = stree->textlen - leafindex + 1;
91 loc->remain = loc->edgelen - prefixlen;
92 loc->nextnode.toleaf = True;
93 loc->nextnode.address = stree->leaftab + leafindex;
94 loc->locstring.start = leafindex;
95 loc->locstring.length = prefixlen;
96 if(prefixlen == (Uint) (right - lptr + 1))
97 {
98 return NULL;
99 }
100 return lptr + prefixlen;
101 }
102 nodeptr = stree->branchtab + GETBRANCHINDEX(node);
103 GETONLYHEADPOS(headposition,nodeptr);
104 leftborder = stree->text + headposition;
105 } else
106 {
107 node = GETCHILD(nodeptr);
108 while(True)
109 {
110 if(NILPTR(node))
111 {
112 return lptr;
113 }
114 if(ISLEAF(node))
115 {
116 leafindex = GETLEAFINDEX(node);
117 leftborder = stree->text + (nodedepth + leafindex);
118 if(leftborder == stree->sentinel)
119 {
120 return lptr;
121 }
122 edgechar = *leftborder;
123 if(edgechar > firstchar)
124 {
125 return lptr;
126 }
127 if(edgechar == firstchar)
128 {
129 if(remainingtoskip > 0)
130 {
131 prefixlen = remainingtoskip +
132 lcp(lptr+remainingtoskip,right,
133 leftborder+remainingtoskip,stree->sentinel-1);
134 } else
135 {
136 prefixlen = 1 + lcp(lptr+1,right,
137 leftborder+1,stree->sentinel-1);
138 }
139 loc->firstptr = leftborder;
140 loc->previousnode = loc->nextnode.address;
141 loc->edgelen = stree->textlen - (nodedepth + leafindex) + 1;
142 loc->remain = loc->edgelen - prefixlen;
143 loc->nextnode.toleaf = True;
144 loc->nextnode.address = stree->leaftab + leafindex;
145 loc->locstring.start = leafindex;
146 loc->locstring.length = nodedepth + prefixlen;
147 if(prefixlen == (Uint) (right - lptr + 1))
148 {
149 return NULL;
150 }
151 return lptr + prefixlen;
152 }
153 node = LEAFBROTHERVAL(stree->leaftab[leafindex]);
154 } else
155 {
156 nodeptr = stree->branchtab + GETBRANCHINDEX(node);
157 GETONLYHEADPOS(headposition,nodeptr);
158 leftborder = stree->text + (nodedepth + headposition);
159 edgechar = *leftborder;
160 if (edgechar > firstchar)
161 {
162 return lptr;
163 }
164 if(edgechar == firstchar)
165 {
166 /*@innerbreak@*/ break;
167 }
168 node = GETBROTHER(nodeptr);
169 }
170 }
171 }
172 GETONLYDEPTH(tmpnodedepth,nodeptr);
173 edgelen = tmpnodedepth - nodedepth;
174 if(remainingtoskip > 0)
175 {
176 if(remainingtoskip >= edgelen)
177 {
178 prefixlen = edgelen;
179 remainingtoskip -= prefixlen;
180 } else
181 {
182 NOTSUPPOSEDTOBENULL(leftborder);
183 prefixlen = remainingtoskip +
184 lcp(lptr+remainingtoskip,right,
185 leftborder+remainingtoskip,leftborder+edgelen-1);
186 remainingtoskip = 0;
187 }
188 } else
189 {
190 NOTSUPPOSEDTOBENULL(leftborder);
191 prefixlen = 1 + lcp(lptr+1,right,
192 leftborder+1,leftborder+edgelen-1);
193 }
194 loc->nextnode.toleaf = False;
195 loc->locstring.start = headposition;
196 loc->locstring.length = nodedepth + prefixlen;
197 if(prefixlen == edgelen)
198 {
199 lptr += edgelen;
200 nodedepth += edgelen;
201 loc->nextnode.address = nodeptr;
202 loc->remain = 0;
203 } else
204 {
205 loc->firstptr = leftborder;
206 loc->previousnode = loc->nextnode.address;
207 loc->nextnode.address = nodeptr;
208 loc->edgelen = edgelen;
209 loc->remain = loc->edgelen - prefixlen;
210 if(prefixlen == (Uint) (right - lptr + 1))
211 {
212 return NULL;
213 }
214 return lptr + prefixlen;
215 }
216 }
217 }
218
scanprefixstree(Suffixtree * stree,Location * outloc,Location * inloc,SYMBOL * left,SYMBOL * right,Uint rescanlength)219 /*@null@*/ SYMBOL *scanprefixstree(Suffixtree *stree,Location *outloc,
220 Location *inloc,SYMBOL *left,
221 SYMBOL *right,Uint rescanlength)
222 {
223 Uint prefixlen, remainingtoskip;
224
225 DEBUG0(4,"scanprefixstree starts at location ");
226 DEBUGCODE(4,showlocation(stdout,stree,inloc));
227 DEBUG0(4,"\n");
228 if(inloc->remain == 0)
229 {
230 return scanprefixfromnodestree(stree,outloc,inloc->nextnode.address,
231 left,right,rescanlength);
232 }
233 if(rescanlength <= inloc->locstring.length)
234 {
235 remainingtoskip = 0;
236 } else
237 {
238 remainingtoskip = rescanlength - inloc->locstring.length;
239 }
240 if(inloc->nextnode.toleaf)
241 {
242
243 if(remainingtoskip > 0)
244 {
245 prefixlen = remainingtoskip +
246 lcp(left+remainingtoskip,right,
247 inloc->firstptr+(inloc->edgelen-inloc->remain)
248 +remainingtoskip,
249 stree->sentinel-1);
250 } else
251 {
252 prefixlen = lcp(left,right,
253 inloc->firstptr+(inloc->edgelen-inloc->remain),
254 stree->sentinel-1);
255 }
256 outloc->firstptr = inloc->firstptr;
257 outloc->edgelen = inloc->edgelen;
258 outloc->remain = inloc->remain - prefixlen;
259 outloc->previousnode = inloc->previousnode;
260 outloc->nextnode.toleaf = True;
261 outloc->nextnode.address = inloc->nextnode.address;
262 outloc->locstring.start = LEAFADDR2NUM(stree,inloc->nextnode.address);
263 outloc->locstring.length = inloc->locstring.length + prefixlen;
264 return left + prefixlen;
265 }
266 if(remainingtoskip > 0)
267 {
268 if(remainingtoskip >= inloc->remain)
269 {
270 prefixlen = inloc->remain;
271 } else
272 {
273 prefixlen = remainingtoskip +
274 lcp(left+remainingtoskip,right,
275 inloc->firstptr+(inloc->edgelen-inloc->remain)
276 +remainingtoskip,
277 inloc->firstptr+inloc->edgelen-1);
278 }
279 } else
280 {
281 prefixlen = lcp(left,right,
282 inloc->firstptr+(inloc->edgelen-inloc->remain),
283 inloc->firstptr+inloc->edgelen-1);
284 }
285 if(prefixlen < inloc->remain)
286 {
287 outloc->firstptr = inloc->firstptr;
288 outloc->edgelen = inloc->edgelen;
289 outloc->remain = inloc->remain - prefixlen;
290 outloc->previousnode = inloc->previousnode;
291 outloc->nextnode.toleaf = False;
292 outloc->nextnode.address = inloc->nextnode.address;
293 outloc->locstring.start = inloc->locstring.start;
294 outloc->locstring.length = inloc->locstring.length + prefixlen;
295 return left + prefixlen;
296 }
297 return scanprefixfromnodestree(stree,outloc,inloc->nextnode.address,
298 left+prefixlen,right,rescanlength);
299 }
300
findprefixpathfromnodestree(Suffixtree * stree,ArrayPathinfo * path,Location * loc,Bref btptr,SYMBOL * left,SYMBOL * right,Uint rescanlength)301 /*@null@*/SYMBOL *findprefixpathfromnodestree(Suffixtree *stree,
302 ArrayPathinfo *path,
303 Location *loc,
304 Bref btptr,
305 SYMBOL *left,
306 SYMBOL *right,
307 Uint rescanlength)
308 {
309 Uint *nodeptr = NULL, *largeptr = NULL, leafindex, nodedepth,
310 edgelen, node, distance = 0, prefixlen, headposition,
311 remainingtoskip, tmpnodedepth;
312 SYMBOL *leftborder = (SYMBOL *) NULL, *lptr, firstchar, edgechar = 0;
313
314 lptr = left;
315 nodeptr = btptr;
316 if(nodeptr == stree->branchtab)
317 {
318 nodedepth = 0;
319 headposition = 0;
320 } else
321 {
322 GETBOTH(nodedepth,headposition,nodeptr);
323 }
324 loc->nextnode.toleaf = False;
325 loc->nextnode.address = nodeptr;
326 loc->locstring.start = headposition;
327 loc->locstring.length = nodedepth;
328 loc->remain = 0;
329 if(rescanlength <= nodedepth)
330 {
331 remainingtoskip = 0;
332 } else
333 {
334 remainingtoskip = rescanlength - nodedepth;
335 }
336 while(True)
337 {
338 if(lptr > right) // check for empty word
339 {
340 return NULL;
341 }
342 firstchar = *lptr;
343 if(nodeptr == stree->branchtab) // at the root
344 {
345 if((node = stree->rootchildren[(Uint) firstchar]) == UNDEFINEDREFERENCE)
346 {
347 return lptr;
348 }
349 if(ISLEAF(node))
350 {
351 leafindex = GETLEAFINDEX(node);
352 loc->firstptr = stree->text + leafindex;
353 if(remainingtoskip > 0)
354 {
355 prefixlen = remainingtoskip +
356 lcp(lptr+remainingtoskip,right,
357 loc->firstptr+remainingtoskip,stree->sentinel-1);
358 } else
359 {
360 prefixlen = 1 + lcp(lptr+1,right,
361 loc->firstptr+1,stree->sentinel-1);
362 }
363 loc->previousnode = stree->branchtab;
364 loc->edgelen = stree->textlen - leafindex + 1;
365 loc->remain = loc->edgelen - prefixlen;
366 loc->nextnode.toleaf = True;
367 loc->nextnode.address = stree->leaftab + leafindex;
368 loc->locstring.start = leafindex;
369 loc->locstring.length = prefixlen;
370 if(prefixlen == (Uint) (right - lptr + 1))
371 {
372 return NULL;
373 }
374 return lptr + prefixlen;
375 }
376 nodeptr = stree->branchtab + GETBRANCHINDEX(node);
377 GETONLYHEADPOS(headposition,nodeptr);
378 leftborder = stree->text + headposition;
379 } else
380 {
381 node = GETCHILD(nodeptr);
382 while(True)
383 {
384 if(NILPTR(node))
385 {
386 return lptr;
387 }
388 if(ISLEAF(node))
389 {
390 leafindex = GETLEAFINDEX(node);
391 leftborder = stree->text + (nodedepth + leafindex);
392 if(leftborder == stree->sentinel)
393 {
394 return lptr;
395 }
396 edgechar = *leftborder;
397 if(edgechar > firstchar)
398 {
399 return lptr;
400 }
401 if(edgechar == firstchar)
402 {
403 if(remainingtoskip > 0)
404 {
405 prefixlen = remainingtoskip +
406 lcp(lptr+remainingtoskip,right,
407 leftborder+remainingtoskip,stree->sentinel-1);
408 } else
409 {
410 prefixlen = 1 + lcp(lptr+1,right,
411 leftborder+1,stree->sentinel-1);
412 }
413 loc->firstptr = leftborder;
414 loc->previousnode = loc->nextnode.address;
415 loc->edgelen = stree->textlen - (nodedepth + leafindex) + 1;
416 loc->remain = loc->edgelen - prefixlen;
417 loc->nextnode.toleaf = True;
418 loc->nextnode.address = stree->leaftab + leafindex;
419 loc->locstring.start = leafindex;
420 loc->locstring.length = nodedepth + prefixlen;
421 if(prefixlen == (Uint) (right - lptr + 1))
422 {
423 return NULL;
424 }
425 return lptr + prefixlen;
426 }
427 node = LEAFBROTHERVAL(stree->leaftab[leafindex]);
428 } else
429 {
430 nodeptr = stree->branchtab + GETBRANCHINDEX(node);
431 GETONLYHEADPOS(headposition,nodeptr);
432 leftborder = stree->text + (nodedepth + headposition);
433 edgechar = *leftborder;
434 if (edgechar > firstchar)
435 {
436 return lptr;
437 }
438 if(edgechar == firstchar)
439 {
440 /*@innerbreak@*/ break;
441 }
442 node = GETBROTHER(nodeptr);
443 }
444 }
445 }
446 GETONLYDEPTH(tmpnodedepth,nodeptr);
447 edgelen = tmpnodedepth - nodedepth;
448 if(remainingtoskip > 0)
449 {
450 if(remainingtoskip >= edgelen)
451 {
452 prefixlen = edgelen;
453 remainingtoskip -= prefixlen;
454 } else
455 {
456 NOTSUPPOSEDTOBENULL(leftborder);
457 prefixlen = remainingtoskip +
458 lcp(lptr+remainingtoskip,right,
459 leftborder+remainingtoskip,leftborder+edgelen-1);
460 remainingtoskip = 0;
461 }
462 } else
463 {
464 NOTSUPPOSEDTOBENULL(leftborder);
465 prefixlen = 1 + lcp(lptr+1,right,
466 leftborder+1,leftborder+edgelen-1);
467 }
468 loc->nextnode.toleaf = False;
469 loc->locstring.start = headposition;
470 loc->locstring.length = nodedepth + prefixlen;
471 if(prefixlen == edgelen)
472 {
473 lptr += edgelen;
474 nodedepth += edgelen;
475 loc->nextnode.address = nodeptr;
476 loc->remain = 0;
477 } else
478 {
479 loc->firstptr = leftborder;
480 loc->previousnode = loc->nextnode.address;
481 loc->nextnode.address = nodeptr;
482 loc->edgelen = edgelen;
483 loc->remain = loc->edgelen - prefixlen;
484 if(prefixlen == (Uint) (right - lptr + 1))
485 {
486 return NULL;
487 }
488 return lptr + prefixlen;
489 }
490 CHECKARRAYSPACE(path,Pathinfo,128);
491 path->spacePathinfo[path->nextfreePathinfo].ref = nodeptr;
492 path->spacePathinfo[path->nextfreePathinfo].depth = tmpnodedepth;
493 path->spacePathinfo[path->nextfreePathinfo].headposition = headposition;
494 path->nextfreePathinfo++;
495 }
496 }
497
findprefixpathstree(Suffixtree * stree,ArrayPathinfo * path,Location * outloc,Location * inloc,SYMBOL * left,SYMBOL * right,Uint rescanlength)498 /*@null@*/ SYMBOL *findprefixpathstree(Suffixtree *stree,
499 ArrayPathinfo *path,
500 Location *outloc,
501 Location *inloc,
502 SYMBOL *left,
503 SYMBOL *right,
504 Uint rescanlength)
505 {
506 Uint prefixlen, remainingtoskip;
507
508 DEBUG0(4,"findprefixpathstree starts at location ");
509 DEBUGCODE(4,showlocation(stdout,stree,inloc));
510 DEBUG0(4,"\n");
511 if(inloc->remain == 0)
512 {
513 CHECKARRAYSPACE(path,Pathinfo,128);
514 path->spacePathinfo[path->nextfreePathinfo].ref
515 = inloc->nextnode.address;
516 path->spacePathinfo[path->nextfreePathinfo].depth
517 = inloc->locstring.length;
518 path->spacePathinfo[path->nextfreePathinfo].headposition
519 = inloc->locstring.start;
520 path->nextfreePathinfo++;
521 return findprefixpathfromnodestree(stree,path,outloc,
522 inloc->nextnode.address,
523 left,right,rescanlength);
524 }
525 if(rescanlength <= inloc->locstring.length)
526 {
527 remainingtoskip = 0;
528 } else
529 {
530 remainingtoskip = rescanlength - inloc->locstring.length;
531 }
532 if(inloc->nextnode.toleaf)
533 {
534 if(remainingtoskip > 0)
535 {
536 prefixlen = remainingtoskip +
537 lcp(left+remainingtoskip,right,
538 inloc->firstptr+(inloc->edgelen-inloc->remain)
539 +remainingtoskip,
540 stree->sentinel-1);
541 } else
542 {
543 prefixlen = lcp(left,right,
544 inloc->firstptr+(inloc->edgelen-inloc->remain),
545 stree->sentinel-1);
546 }
547 outloc->firstptr = inloc->firstptr;
548 outloc->edgelen = inloc->edgelen;
549 outloc->remain = inloc->remain - prefixlen;
550 outloc->previousnode = inloc->previousnode;
551 outloc->nextnode.toleaf = True;
552 outloc->nextnode.address = inloc->nextnode.address;
553 outloc->locstring.start = LEAFADDR2NUM(stree,inloc->nextnode.address);
554 outloc->locstring.length = inloc->locstring.length + prefixlen;
555 return left + prefixlen;
556 }
557 if(remainingtoskip > 0)
558 {
559 if(remainingtoskip >= inloc->remain)
560 {
561 prefixlen = inloc->remain;
562 } else
563 {
564 prefixlen = remainingtoskip +
565 lcp(left+remainingtoskip,right,
566 inloc->firstptr+(inloc->edgelen-inloc->remain)
567 +remainingtoskip,
568 inloc->firstptr+inloc->edgelen-1);
569 }
570 } else
571 {
572 prefixlen = lcp(left,right,
573 inloc->firstptr+(inloc->edgelen-inloc->remain),
574 inloc->firstptr+inloc->edgelen-1);
575 }
576 if(prefixlen < inloc->remain)
577 {
578 outloc->firstptr = inloc->firstptr;
579 outloc->edgelen = inloc->edgelen;
580 outloc->remain = inloc->remain - prefixlen;
581 outloc->previousnode = inloc->previousnode;
582 outloc->nextnode.toleaf = False;
583 outloc->nextnode.address = inloc->nextnode.address;
584 outloc->locstring.start = inloc->locstring.start;
585 outloc->locstring.length = inloc->locstring.length + prefixlen;
586 return left + prefixlen;
587 }
588 CHECKARRAYSPACE(path,Pathinfo,128);
589 path->spacePathinfo[path->nextfreePathinfo].ref = inloc->nextnode.address;
590 path->spacePathinfo[path->nextfreePathinfo].depth
591 = inloc->locstring.length + prefixlen;
592 path->spacePathinfo[path->nextfreePathinfo].headposition
593 = inloc->locstring.start;
594 path->nextfreePathinfo++;
595 return findprefixpathfromnodestree(stree,path,outloc,
596 inloc->nextnode.address,
597 left+prefixlen,right,rescanlength);
598 }
599