1 /*
2  * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
3  *
4  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
6  *
7  * Permission is hereby granted to use or copy this program
8  * for any purpose,  provided the above notices are retained on all copies.
9  * Permission to modify the code and to distribute modified code is granted,
10  * provided the above notices are retained, and a notice that the code was
11  * modified is included with the above copyright notice.
12  *
13  * Author: Hans-J. Boehm (boehm@parc.xerox.com)
14  */
15 /* Boehm, October 3, 1994 5:19 pm PDT */
16 # include "gc.h"
17 # include "cord.h"
18 # include <stdlib.h>
19 # include <stdio.h>
20 # include <string.h>
21 
22 /* An implementation of the cord primitives.  These are the only 	*/
23 /* Functions that understand the representation.  We perform only	*/
24 /* minimal checks on arguments to these functions.  Out of bounds	*/
25 /* arguments to the iteration functions may result in client functions	*/
26 /* invoked on garbage data.  In most cases, client functions should be	*/
27 /* programmed defensively enough that this does not result in memory	*/
28 /* smashes.								*/
29 
30 typedef void (* oom_fn)(void);
31 
32 oom_fn CORD_oom_fn = (oom_fn) 0;
33 
34 # define OUT_OF_MEMORY {  if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
35 			  ABORT("Out of memory\n"); }
36 # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
37 
38 typedef unsigned long word;
39 
40 typedef union {
41     struct Concatenation {
42     	char null;
43 	char header;
44 	char depth;	/* concatenation nesting depth. */
45 	unsigned char left_len;
46 			/* Length of left child if it is sufficiently	*/
47 			/* short; 0 otherwise.				*/
48 #	    define MAX_LEFT_LEN 255
49 	word len;
50 	CORD left;	/* length(left) > 0	*/
51 	CORD right;	/* length(right) > 0	*/
52     } concatenation;
53     struct Function {
54 	char null;
55 	char header;
56 	char depth;	/* always 0	*/
57 	char left_len;	/* always 0	*/
58 	word len;
59 	CORD_fn fn;
60 	void * client_data;
61     } function;
62     struct Generic {
63     	char null;
64 	char header;
65 	char depth;
66 	char left_len;
67 	word len;
68     } generic;
69     char string[1];
70 } CordRep;
71 
72 # define CONCAT_HDR 1
73 
74 # define FN_HDR 4
75 # define SUBSTR_HDR 6
76 	/* Substring nodes are a special case of function nodes.  	*/
77 	/* The client_data field is known to point to a substr_args	*/
78 	/* structure, and the function is either CORD_apply_access_fn 	*/
79 	/* or CORD_index_access_fn.					*/
80 
81 /* The following may be applied only to function and concatenation nodes: */
82 #define IS_CONCATENATION(s)  (((CordRep *)s)->generic.header == CONCAT_HDR)
83 
84 #define IS_FUNCTION(s)  ((((CordRep *)s)->generic.header & FN_HDR) != 0)
85 
86 #define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
87 
88 #define LEN(s) (((CordRep *)s) -> generic.len)
89 #define DEPTH(s) (((CordRep *)s) -> generic.depth)
90 #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
91 
92 #define LEFT_LEN(c) ((c) -> left_len != 0? \
93 				(c) -> left_len \
94 				: (CORD_IS_STRING((c) -> left) ? \
95 					(c) -> len - GEN_LEN((c) -> right) \
96 					: LEN((c) -> left)))
97 
98 #define SHORT_LIMIT (sizeof(CordRep) - 1)
99 	/* Cords shorter than this are C strings */
100 
101 
102 /* Dump the internal representation of x to stdout, with initial 	*/
103 /* indentation level n.							*/
CORD_dump_inner(CORD x,unsigned n)104 void CORD_dump_inner(CORD x, unsigned n)
105 {
106     register size_t i;
107 
108     for (i = 0; i < (size_t)n; i++) {
109         fputs("  ", stdout);
110     }
111     if (x == 0) {
112       	fputs("NIL\n", stdout);
113     } else if (CORD_IS_STRING(x)) {
114         for (i = 0; i <= SHORT_LIMIT; i++) {
115             if (x[i] == '\0') break;
116             putchar(x[i]);
117         }
118         if (x[i] != '\0') fputs("...", stdout);
119         putchar('\n');
120     } else if (IS_CONCATENATION(x)) {
121         register struct Concatenation * conc =
122         			&(((CordRep *)x) -> concatenation);
123         printf("Concatenation: %p (len: %d, depth: %d)\n",
124                x, (int)(conc -> len), (int)(conc -> depth));
125         CORD_dump_inner(conc -> left, n+1);
126         CORD_dump_inner(conc -> right, n+1);
127     } else /* function */{
128         register struct Function * func =
129         			&(((CordRep *)x) -> function);
130         if (IS_SUBSTR(x)) printf("(Substring) ");
131         printf("Function: %p (len: %d): ", x, (int)(func -> len));
132         for (i = 0; i < 20 && i < func -> len; i++) {
133             putchar((*(func -> fn))(i, func -> client_data));
134         }
135         if (i < func -> len) fputs("...", stdout);
136         putchar('\n');
137     }
138 }
139 
140 /* Dump the internal representation of x to stdout	*/
CORD_dump(CORD x)141 void CORD_dump(CORD x)
142 {
143     CORD_dump_inner(x, 0);
144     fflush(stdout);
145 }
146 
CORD_cat_char_star(CORD x,const char * y,size_t leny)147 CORD CORD_cat_char_star(CORD x, const char * y, size_t leny)
148 {
149     register size_t result_len;
150     register size_t lenx;
151     register int depth;
152 
153     if (x == CORD_EMPTY) return(y);
154     if (leny == 0) return(x);
155     if (CORD_IS_STRING(x)) {
156         lenx = strlen(x);
157         result_len = lenx + leny;
158         if (result_len <= SHORT_LIMIT) {
159             register char * result = GC_MALLOC_ATOMIC(result_len+1);
160 
161             if (result == 0) OUT_OF_MEMORY;
162             memcpy(result, x, lenx);
163             memcpy(result + lenx, y, leny);
164             result[result_len] = '\0';
165             return((CORD) result);
166         } else {
167             depth = 1;
168         }
169     } else {
170     	register CORD right;
171     	register CORD left;
172     	register char * new_right;
173     	register size_t right_len;
174 
175     	lenx = LEN(x);
176 
177         if (leny <= SHORT_LIMIT/2
178     	    && IS_CONCATENATION(x)
179             && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
180             /* Merge y into right part of x. */
181             if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
182             	right_len = lenx - LEN(left);
183             } else if (((CordRep *)x) -> concatenation.left_len != 0) {
184                 right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
185             } else {
186             	right_len = strlen(right);
187             }
188             result_len = right_len + leny;  /* length of new_right */
189             if (result_len <= SHORT_LIMIT) {
190             	new_right = GC_MALLOC_ATOMIC(result_len + 1);
191             	memcpy(new_right, right, right_len);
192             	memcpy(new_right + right_len, y, leny);
193             	new_right[result_len] = '\0';
194             	y = new_right;
195             	leny = result_len;
196             	x = left;
197             	lenx -= right_len;
198             	/* Now fall through to concatenate the two pieces: */
199             }
200             if (CORD_IS_STRING(x)) {
201                 depth = 1;
202             } else {
203                 depth = DEPTH(x) + 1;
204             }
205         } else {
206             depth = DEPTH(x) + 1;
207         }
208         result_len = lenx + leny;
209     }
210     {
211       /* The general case; lenx, result_len is known: */
212     	register struct Concatenation * result;
213 
214     	result = GC_NEW(struct Concatenation);
215     	if (result == 0) OUT_OF_MEMORY;
216     	result->header = CONCAT_HDR;
217     	result->depth = depth;
218     	if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
219     	result->len = result_len;
220     	result->left = x;
221     	result->right = y;
222     	if (depth >= MAX_DEPTH) {
223     	    return(CORD_balance((CORD)result));
224     	} else {
225     	    return((CORD) result);
226     	}
227     }
228 }
229 
230 
CORD_cat(CORD x,CORD y)231 CORD CORD_cat(CORD x, CORD y)
232 {
233     register size_t result_len;
234     register int depth;
235     register size_t lenx;
236 
237     if (x == CORD_EMPTY) return(y);
238     if (y == CORD_EMPTY) return(x);
239     if (CORD_IS_STRING(y)) {
240         return(CORD_cat_char_star(x, y, strlen(y)));
241     } else if (CORD_IS_STRING(x)) {
242         lenx = strlen(x);
243         depth = DEPTH(y) + 1;
244     } else {
245         register int depthy = DEPTH(y);
246 
247         lenx = LEN(x);
248         depth = DEPTH(x) + 1;
249         if (depthy >= depth) depth = depthy + 1;
250     }
251     result_len = lenx + LEN(y);
252     {
253     	register struct Concatenation * result;
254 
255     	result = GC_NEW(struct Concatenation);
256     	if (result == 0) OUT_OF_MEMORY;
257     	result->header = CONCAT_HDR;
258     	result->depth = depth;
259     	if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
260     	result->len = result_len;
261     	result->left = x;
262     	result->right = y;
263     	if (depth >= MAX_DEPTH) {
264     	    return(CORD_balance((CORD)result));
265     	} else {
266     	    return((CORD) result);
267     	}
268     }
269 }
270 
271 
272 
CORD_from_fn(CORD_fn fn,void * client_data,size_t len)273 CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
274 {
275     if (len <= 0) return(0);
276     if (len <= SHORT_LIMIT) {
277         register char * result;
278         register size_t i;
279         char buf[SHORT_LIMIT+1];
280         register char c;
281 
282         for (i = 0; i < len; i++) {
283             c = (*fn)(i, client_data);
284             if (c == '\0') goto gen_case;
285             buf[i] = c;
286         }
287         buf[i] = '\0';
288         result = GC_MALLOC_ATOMIC(len+1);
289         if (result == 0) OUT_OF_MEMORY;
290         strcpy(result, buf);
291         result[len] = '\0';
292         return((CORD) result);
293     }
294   gen_case:
295     {
296     	register struct Function * result;
297 
298     	result = GC_NEW(struct Function);
299     	if (result == 0) OUT_OF_MEMORY;
300     	result->header = FN_HDR;
301     	/* depth is already 0 */
302     	result->len = len;
303     	result->fn = fn;
304     	result->client_data = client_data;
305     	return((CORD) result);
306     }
307 }
308 
CORD_len(CORD x)309 size_t CORD_len(CORD x)
310 {
311     if (x == 0) {
312      	return(0);
313     } else {
314 	return(GEN_LEN(x));
315     }
316 }
317 
318 struct substr_args {
319     CordRep * sa_cord;
320     size_t sa_index;
321 };
322 
CORD_index_access_fn(size_t i,void * client_data)323 char CORD_index_access_fn(size_t i, void * client_data)
324 {
325     register struct substr_args *descr = (struct substr_args *)client_data;
326 
327     return(((char *)(descr->sa_cord))[i + descr->sa_index]);
328 }
329 
CORD_apply_access_fn(size_t i,void * client_data)330 char CORD_apply_access_fn(size_t i, void * client_data)
331 {
332     register struct substr_args *descr = (struct substr_args *)client_data;
333     register struct Function * fn_cord = &(descr->sa_cord->function);
334 
335     return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
336 }
337 
338 /* A version of CORD_substr that simply returns a function node, thus	*/
339 /* postponing its work.	The fourth argument is a function that may	*/
340 /* be used for efficient access to the ith character.			*/
341 /* Assumes i >= 0 and i + n < length(x).				*/
CORD_substr_closure(CORD x,size_t i,size_t n,CORD_fn f)342 CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
343 {
344     register struct substr_args * sa = GC_NEW(struct substr_args);
345     CORD result;
346 
347     if (sa == 0) OUT_OF_MEMORY;
348     sa->sa_cord = (CordRep *)x;
349     sa->sa_index = i;
350     result = CORD_from_fn(f, (void *)sa, n);
351     ((CordRep *)result) -> function.header = SUBSTR_HDR;
352     return (result);
353 }
354 
355 # define SUBSTR_LIMIT (10 * SHORT_LIMIT)
356 	/* Substrings of function nodes and flat strings shorter than 	*/
357 	/* this are flat strings.  Othewise we use a functional 	*/
358 	/* representation, which is significantly slower to access.	*/
359 
360 /* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
CORD_substr_checked(CORD x,size_t i,size_t n)361 CORD CORD_substr_checked(CORD x, size_t i, size_t n)
362 {
363     if (CORD_IS_STRING(x)) {
364         if (n > SUBSTR_LIMIT) {
365             return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
366         } else {
367             register char * result = GC_MALLOC_ATOMIC(n+1);
368 
369             if (result == 0) OUT_OF_MEMORY;
370             strncpy(result, x+i, n);
371             result[n] = '\0';
372             return(result);
373         }
374     } else if (IS_CONCATENATION(x)) {
375     	register struct Concatenation * conc
376     			= &(((CordRep *)x) -> concatenation);
377     	register size_t left_len;
378     	register size_t right_len;
379 
380     	left_len = LEFT_LEN(conc);
381     	right_len = conc -> len - left_len;
382     	if (i >= left_len) {
383     	    if (n == right_len) return(conc -> right);
384     	    return(CORD_substr_checked(conc -> right, i - left_len, n));
385     	} else if (i+n <= left_len) {
386     	    if (n == left_len) return(conc -> left);
387     	    return(CORD_substr_checked(conc -> left, i, n));
388     	} else {
389     	    /* Need at least one character from each side. */
390     	    register CORD left_part;
391     	    register CORD right_part;
392     	    register size_t left_part_len = left_len - i;
393 
394     	    if (i == 0) {
395     	        left_part = conc -> left;
396     	    } else {
397     	        left_part = CORD_substr_checked(conc -> left, i, left_part_len);
398     	    }
399     	    if (i + n == right_len + left_len) {
400     	         right_part = conc -> right;
401     	    } else {
402     	         right_part = CORD_substr_checked(conc -> right, 0,
403     	    				          n - left_part_len);
404     	    }
405     	    return(CORD_cat(left_part, right_part));
406     	}
407     } else /* function */ {
408         if (n > SUBSTR_LIMIT) {
409             if (IS_SUBSTR(x)) {
410             	/* Avoid nesting substring nodes.	*/
411             	register struct Function * f = &(((CordRep *)x) -> function);
412             	register struct substr_args *descr =
413             			(struct substr_args *)(f -> client_data);
414 
415             	return(CORD_substr_closure((CORD)descr->sa_cord,
416             				   i + descr->sa_index,
417             				   n, f -> fn));
418             } else {
419                 return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
420             }
421         } else {
422             char * result;
423             register struct Function * f = &(((CordRep *)x) -> function);
424             char buf[SUBSTR_LIMIT+1];
425             register char * p = buf;
426             register char c;
427             register int j;
428             register int lim = i + n;
429 
430             for (j = i; j < lim; j++) {
431             	c = (*(f -> fn))(j, f -> client_data);
432             	if (c == '\0') {
433             	    return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
434             	}
435             	*p++ = c;
436             }
437             *p = '\0';
438             result = GC_MALLOC_ATOMIC(n+1);
439             if (result == 0) OUT_OF_MEMORY;
440             strcpy(result, buf);
441             return(result);
442         }
443     }
444 }
445 
CORD_substr(CORD x,size_t i,size_t n)446 CORD CORD_substr(CORD x, size_t i, size_t n)
447 {
448     register size_t len = CORD_len(x);
449 
450     if (i >= len || n <= 0) return(0);
451     	/* n < 0 is impossible in a correct C implementation, but	*/
452     	/* quite possible  under SunOS 4.X.				*/
453     if (i + n > len) n = len - i;
454 #   ifndef __STDC__
455       if (i < 0) ABORT("CORD_substr: second arg. negative");
456     	/* Possible only if both client and C implementation are buggy.	*/
457     	/* But empirically this happens frequently.			*/
458 #   endif
459     return(CORD_substr_checked(x, i, n));
460 }
461 
462 /* See cord.h for definition.  We assume i is in range.	*/
CORD_iter5(CORD x,size_t i,CORD_iter_fn f1,CORD_batched_iter_fn f2,void * client_data)463 int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
464 			 CORD_batched_iter_fn f2, void * client_data)
465 {
466     if (x == 0) return(0);
467     if (CORD_IS_STRING(x)) {
468     	register const char *p = x+i;
469 
470     	if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
471         if (f2 != CORD_NO_FN) {
472             return((*f2)(p, client_data));
473         } else {
474 	    while (*p) {
475                 if ((*f1)(*p, client_data)) return(1);
476                 p++;
477 	    }
478 	    return(0);
479         }
480     } else if (IS_CONCATENATION(x)) {
481     	register struct Concatenation * conc
482     			= &(((CordRep *)x) -> concatenation);
483 
484 
485     	if (i > 0) {
486     	    register size_t left_len = LEFT_LEN(conc);
487 
488     	    if (i >= left_len) {
489     	        return(CORD_iter5(conc -> right, i - left_len, f1, f2,
490     	        		  client_data));
491     	    }
492     	}
493     	if (CORD_iter5(conc -> left, i, f1, f2, client_data)) {
494     	    return(1);
495     	}
496     	return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
497     } else /* function */ {
498         register struct Function * f = &(((CordRep *)x) -> function);
499         register size_t j;
500         register size_t lim = f -> len;
501 
502         for (j = i; j < lim; j++) {
503             if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
504                 return(1);
505             }
506         }
507         return(0);
508     }
509 }
510 
511 #undef CORD_iter
CORD_iter(CORD x,CORD_iter_fn f1,void * client_data)512 int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
513 {
514     return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
515 }
516 
CORD_riter4(CORD x,size_t i,CORD_iter_fn f1,void * client_data)517 int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
518 {
519     if (x == 0) return(0);
520     if (CORD_IS_STRING(x)) {
521 	register const char *p = x + i;
522 	register char c;
523 
524 	for(;;) {
525 	    c = *p;
526 	    if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
527             if ((*f1)(c, client_data)) return(1);
528 	    if (p == x) break;
529             p--;
530 	}
531 	return(0);
532     } else if (IS_CONCATENATION(x)) {
533     	register struct Concatenation * conc
534     			= &(((CordRep *)x) -> concatenation);
535     	register CORD left_part = conc -> left;
536     	register size_t left_len;
537 
538     	left_len = LEFT_LEN(conc);
539     	if (i >= left_len) {
540     	    if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
541     	    	return(1);
542     	    }
543     	    return(CORD_riter4(left_part, left_len - 1, f1, client_data));
544     	} else {
545     	    return(CORD_riter4(left_part, i, f1, client_data));
546     	}
547     } else /* function */ {
548         register struct Function * f = &(((CordRep *)x) -> function);
549         register size_t j;
550 
551         for (j = i; ; j--) {
552             if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
553                 return(1);
554             }
555             if (j == 0) return(0);
556         }
557     }
558 }
559 
CORD_riter(CORD x,CORD_iter_fn f1,void * client_data)560 int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
561 {
562     return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
563 }
564 
565 /*
566  * The following functions are concerned with balancing cords.
567  * Strategy:
568  * Scan the cord from left to right, keeping the cord scanned so far
569  * as a forest of balanced trees of exponentialy decreasing length.
570  * When a new subtree needs to be added to the forest, we concatenate all
571  * shorter ones to the new tree in the appropriate order, and then insert
572  * the result into the forest.
573  * Crucial invariants:
574  * 1. The concatenation of the forest (in decreasing order) with the
575  *     unscanned part of the rope is equal to the rope being balanced.
576  * 2. All trees in the forest are balanced.
577  * 3. forest[i] has depth at most i.
578  */
579 
580 typedef struct {
581     CORD c;
582     size_t len;		/* Actual length of c 	*/
583 } ForestElement;
584 
585 static size_t min_len [ MAX_DEPTH ];
586 
587 static int min_len_init = 0;
588 
589 int CORD_max_len;
590 
591 typedef ForestElement Forest [ MAX_DEPTH ];
592 			/* forest[i].len >= fib(i+1)	        */
593 			/* The string is the concatenation	*/
594 			/* of the forest in order of DECREASING */
595 			/* indices.				*/
596 
CORD_init_min_len()597 void CORD_init_min_len()
598 {
599     register int i;
600     register size_t last, previous, current;
601 
602     min_len[0] = previous = 1;
603     min_len[1] = last = 2;
604     for (i = 2; i < MAX_DEPTH; i++) {
605     	current = last + previous;
606     	if (current < last) /* overflow */ current = last;
607     	min_len[i] = current;
608     	previous = last;
609     	last = current;
610     }
611     CORD_max_len = last - 1;
612     min_len_init = 1;
613 }
614 
615 
CORD_init_forest(ForestElement * forest,size_t max_len)616 void CORD_init_forest(ForestElement * forest, size_t max_len)
617 {
618     register int i;
619 
620     for (i = 0; i < MAX_DEPTH; i++) {
621     	forest[i].c = 0;
622     	if (min_len[i] > max_len) return;
623     }
624     ABORT("Cord too long");
625 }
626 
627 /* Add a leaf to the appropriate level in the forest, cleaning		*/
628 /* out lower levels as necessary.					*/
629 /* Also works if x is a balanced tree of concatenations; however	*/
630 /* in this case an extra concatenation node may be inserted above x;	*/
631 /* This node should not be counted in the statement of the invariants.	*/
CORD_add_forest(ForestElement * forest,CORD x,size_t len)632 void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
633 {
634     register int i = 0;
635     register CORD sum = CORD_EMPTY;
636     register size_t sum_len = 0;
637 
638     while (len > min_len[i + 1]) {
639     	if (forest[i].c != 0) {
640     	    sum = CORD_cat(forest[i].c, sum);
641     	    sum_len += forest[i].len;
642     	    forest[i].c = 0;
643     	}
644         i++;
645     }
646     /* Sum has depth at most 1 greter than what would be required 	*/
647     /* for balance.							*/
648     sum = CORD_cat(sum, x);
649     sum_len += len;
650     /* If x was a leaf, then sum is now balanced.  To see this		*/
651     /* consider the two cases in which forest[i-1] either is or is 	*/
652     /* not empty.							*/
653     while (sum_len >= min_len[i]) {
654     	if (forest[i].c != 0) {
655     	    sum = CORD_cat(forest[i].c, sum);
656     	    sum_len += forest[i].len;
657     	    /* This is again balanced, since sum was balanced, and has	*/
658     	    /* allowable depth that differs from i by at most 1.	*/
659     	    forest[i].c = 0;
660     	}
661         i++;
662     }
663     i--;
664     forest[i].c = sum;
665     forest[i].len = sum_len;
666 }
667 
CORD_concat_forest(ForestElement * forest,size_t expected_len)668 CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
669 {
670     register int i = 0;
671     CORD sum = 0;
672     size_t sum_len = 0;
673 
674     while (sum_len != expected_len) {
675     	if (forest[i].c != 0) {
676     	    sum = CORD_cat(forest[i].c, sum);
677     	    sum_len += forest[i].len;
678     	}
679         i++;
680     }
681     return(sum);
682 }
683 
684 /* Insert the frontier of x into forest.  Balanced subtrees are	*/
685 /* treated as leaves.  This potentially adds one to the depth	*/
686 /* of the final tree.						*/
CORD_balance_insert(CORD x,size_t len,ForestElement * forest)687 void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
688 {
689     register int depth;
690 
691     if (CORD_IS_STRING(x)) {
692         CORD_add_forest(forest, x, len);
693     } else if (IS_CONCATENATION(x)
694                && ((depth = DEPTH(x)) >= MAX_DEPTH
695                    || len < min_len[depth])) {
696     	register struct Concatenation * conc
697     			= &(((CordRep *)x) -> concatenation);
698     	size_t left_len = LEFT_LEN(conc);
699 
700     	CORD_balance_insert(conc -> left, left_len, forest);
701     	CORD_balance_insert(conc -> right, len - left_len, forest);
702     } else /* function or balanced */ {
703     	CORD_add_forest(forest, x, len);
704     }
705 }
706 
707 
CORD_balance(CORD x)708 CORD CORD_balance(CORD x)
709 {
710     Forest forest;
711     register size_t len;
712 
713     if (x == 0) return(0);
714     if (CORD_IS_STRING(x)) return(x);
715     if (!min_len_init) CORD_init_min_len();
716     len = LEN(x);
717     CORD_init_forest(forest, len);
718     CORD_balance_insert(x, len, forest);
719     return(CORD_concat_forest(forest, len));
720 }
721 
722 
723 /* Position primitives	*/
724 
725 /* Private routines to deal with the hard cases only: */
726 
727 /* P contains a prefix of the  path to cur_pos.	Extend it to a full	*/
728 /* path and set up leaf info.						*/
729 /* Return 0 if past the end of cord, 1 o.w.				*/
CORD__extend_path(register CORD_pos p)730 void CORD__extend_path(register CORD_pos p)
731 {
732      register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
733      register CORD top = current_pe -> pe_cord;
734      register size_t pos = p[0].cur_pos;
735      register size_t top_pos = current_pe -> pe_start_pos;
736      register size_t top_len = GEN_LEN(top);
737 
738      /* Fill in the rest of the path. */
739        while(!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
740      	 register struct Concatenation * conc =
741      	 		&(((CordRep *)top) -> concatenation);
742      	 register size_t left_len;
743 
744      	 left_len = LEFT_LEN(conc);
745      	 current_pe++;
746      	 if (pos >= top_pos + left_len) {
747      	     current_pe -> pe_cord = top = conc -> right;
748      	     current_pe -> pe_start_pos = top_pos = top_pos + left_len;
749      	     top_len -= left_len;
750      	 } else {
751      	     current_pe -> pe_cord = top = conc -> left;
752      	     current_pe -> pe_start_pos = top_pos;
753      	     top_len = left_len;
754      	 }
755      	 p[0].path_len++;
756        }
757      /* Fill in leaf description for fast access. */
758        if (CORD_IS_STRING(top)) {
759          p[0].cur_leaf = top;
760          p[0].cur_start = top_pos;
761          p[0].cur_end = top_pos + top_len;
762        } else {
763          p[0].cur_end = 0;
764        }
765        if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
766 }
767 
CORD__pos_fetch(register CORD_pos p)768 char CORD__pos_fetch(register CORD_pos p)
769 {
770     /* Leaf is a function node */
771     struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
772     CORD leaf = pe -> pe_cord;
773     register struct Function * f = &(((CordRep *)leaf) -> function);
774 
775     if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
776     return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
777 }
778 
CORD__next(register CORD_pos p)779 void CORD__next(register CORD_pos p)
780 {
781     register size_t cur_pos = p[0].cur_pos + 1;
782     register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
783     register CORD leaf = current_pe -> pe_cord;
784 
785     /* Leaf is not a string or we're at end of leaf */
786     p[0].cur_pos = cur_pos;
787     if (!CORD_IS_STRING(leaf)) {
788     	/* Function leaf	*/
789     	register struct Function * f = &(((CordRep *)leaf) -> function);
790     	register size_t start_pos = current_pe -> pe_start_pos;
791     	register size_t end_pos = start_pos + f -> len;
792 
793     	if (cur_pos < end_pos) {
794     	  /* Fill cache and return. */
795     	    register size_t i;
796     	    register size_t limit = cur_pos + FUNCTION_BUF_SZ;
797     	    register CORD_fn fn = f -> fn;
798     	    register void * client_data = f -> client_data;
799 
800     	    if (limit > end_pos) {
801     	        limit = end_pos;
802     	    }
803     	    for (i = cur_pos; i < limit; i++) {
804     	        p[0].function_buf[i - cur_pos] =
805     	        	(*fn)(i - start_pos, client_data);
806     	    }
807     	    p[0].cur_start = cur_pos;
808     	    p[0].cur_leaf = p[0].function_buf;
809     	    p[0].cur_end = limit;
810     	    return;
811     	}
812     }
813     /* End of leaf	*/
814     /* Pop the stack until we find two concatenation nodes with the 	*/
815     /* same start position: this implies we were in left part.		*/
816     {
817     	while (p[0].path_len > 0
818     	       && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
819     	    p[0].path_len--;
820     	    current_pe--;
821     	}
822     	if (p[0].path_len == 0) {
823 	    p[0].path_len = CORD_POS_INVALID;
824             return;
825 	}
826     }
827     p[0].path_len--;
828     CORD__extend_path(p);
829 }
830 
CORD__prev(register CORD_pos p)831 void CORD__prev(register CORD_pos p)
832 {
833     register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
834 
835     if (p[0].cur_pos == 0) {
836         p[0].path_len = CORD_POS_INVALID;
837         return;
838     }
839     p[0].cur_pos--;
840     if (p[0].cur_pos >= pe -> pe_start_pos) return;
841 
842     /* Beginning of leaf	*/
843 
844     /* Pop the stack until we find two concatenation nodes with the 	*/
845     /* different start position: this implies we were in right part.	*/
846     {
847     	register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
848 
849     	while (p[0].path_len > 0
850     	       && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
851     	    p[0].path_len--;
852     	    current_pe--;
853     	}
854     }
855     p[0].path_len--;
856     CORD__extend_path(p);
857 }
858 
859 #undef CORD_pos_fetch
860 #undef CORD_next
861 #undef CORD_prev
862 #undef CORD_pos_to_index
863 #undef CORD_pos_to_cord
864 #undef CORD_pos_valid
865 
CORD_pos_fetch(register CORD_pos p)866 char CORD_pos_fetch(register CORD_pos p)
867 {
868     if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
869     	return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
870     } else {
871         return(CORD__pos_fetch(p));
872     }
873 }
874 
CORD_next(CORD_pos p)875 void CORD_next(CORD_pos p)
876 {
877     if (p[0].cur_pos < p[0].cur_end - 1) {
878     	p[0].cur_pos++;
879     } else {
880     	CORD__next(p);
881     }
882 }
883 
CORD_prev(CORD_pos p)884 void CORD_prev(CORD_pos p)
885 {
886     if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
887     	p[0].cur_pos--;
888     } else {
889     	CORD__prev(p);
890     }
891 }
892 
CORD_pos_to_index(CORD_pos p)893 size_t CORD_pos_to_index(CORD_pos p)
894 {
895     return(p[0].cur_pos);
896 }
897 
CORD_pos_to_cord(CORD_pos p)898 CORD CORD_pos_to_cord(CORD_pos p)
899 {
900     return(p[0].path[0].pe_cord);
901 }
902 
CORD_pos_valid(CORD_pos p)903 int CORD_pos_valid(CORD_pos p)
904 {
905     return(p[0].path_len != CORD_POS_INVALID);
906 }
907 
CORD_set_pos(CORD_pos p,CORD x,size_t i)908 void CORD_set_pos(CORD_pos p, CORD x, size_t i)
909 {
910     if (x == CORD_EMPTY) {
911     	p[0].path_len = CORD_POS_INVALID;
912     	return;
913     }
914     p[0].path[0].pe_cord = x;
915     p[0].path[0].pe_start_pos = 0;
916     p[0].path_len = 0;
917     p[0].cur_pos = i;
918     CORD__extend_path(p);
919 }
920