1 /*
2  * colorings of characters
3  * This file is #included by regcomp.c.
4  *
5  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
6  *
7  * Development of this software was funded, in part, by Cray Research Inc.,
8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9  * Corporation, none of whom are responsible for the results.  The author
10  * thanks all of them.
11  *
12  * Redistribution and use in source and binary forms -- with or without
13  * modification -- are permitted for any purpose, provided that
14  * redistributions in source form retain this entire copyright notice and
15  * indicate the origin and nature of any modifications.
16  *
17  * I'd appreciate being given credit for this package in the documentation
18  * of software which uses it, but that is not a requirement.
19  *
20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * src/backend/regex/regc_color.c
32  *
33  *
34  * Note that there are some incestuous relationships between this code and
35  * NFA arc maintenance, which perhaps ought to be cleaned up sometime.
36  */
37 
38 
39 
40 #define CISERR()	VISERR(cm->v)
41 #define CERR(e)		VERR(cm->v, (e))
42 
43 
44 
45 /*
46  * initcm - set up new colormap
47  */
48 static void
initcm(struct vars * v,struct colormap * cm)49 initcm(struct vars *v,
50 	   struct colormap *cm)
51 {
52 	struct colordesc *cd;
53 
54 	cm->magic = CMMAGIC;
55 	cm->v = v;
56 
57 	cm->ncds = NINLINECDS;
58 	cm->cd = cm->cdspace;
59 	cm->max = 0;
60 	cm->free = 0;
61 
62 	cd = cm->cd;				/* cm->cd[WHITE] */
63 	cd->nschrs = MAX_SIMPLE_CHR - CHR_MIN + 1;
64 	cd->nuchrs = 1;
65 	cd->sub = NOSUB;
66 	cd->arcs = NULL;
67 	cd->firstchr = CHR_MIN;
68 	cd->flags = 0;
69 
70 	cm->locolormap = (color *)
71 		MALLOC((MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
72 	if (cm->locolormap == NULL)
73 	{
74 		CERR(REG_ESPACE);
75 		cm->cmranges = NULL;	/* prevent failure during freecm */
76 		cm->hicolormap = NULL;
77 		return;
78 	}
79 	/* this memset relies on WHITE being zero: */
80 	memset(cm->locolormap, WHITE,
81 		   (MAX_SIMPLE_CHR - CHR_MIN + 1) * sizeof(color));
82 
83 	memset(cm->classbits, 0, sizeof(cm->classbits));
84 	cm->numcmranges = 0;
85 	cm->cmranges = NULL;
86 	cm->maxarrayrows = 4;		/* arbitrary initial allocation */
87 	cm->hiarrayrows = 1;		/* but we have only one row/col initially */
88 	cm->hiarraycols = 1;
89 	cm->hicolormap = (color *) MALLOC(cm->maxarrayrows * sizeof(color));
90 	if (cm->hicolormap == NULL)
91 	{
92 		CERR(REG_ESPACE);
93 		return;
94 	}
95 	/* initialize the "all other characters" row to WHITE */
96 	cm->hicolormap[0] = WHITE;
97 }
98 
99 /*
100  * freecm - free dynamically-allocated things in a colormap
101  */
102 static void
freecm(struct colormap * cm)103 freecm(struct colormap *cm)
104 {
105 	cm->magic = 0;
106 	if (cm->cd != cm->cdspace)
107 		FREE(cm->cd);
108 	if (cm->locolormap != NULL)
109 		FREE(cm->locolormap);
110 	if (cm->cmranges != NULL)
111 		FREE(cm->cmranges);
112 	if (cm->hicolormap != NULL)
113 		FREE(cm->hicolormap);
114 }
115 
116 /*
117  * pg_reg_getcolor - slow case of GETCOLOR()
118  */
119 color
pg_reg_getcolor(struct colormap * cm,chr c)120 pg_reg_getcolor(struct colormap *cm, chr c)
121 {
122 	int			rownum,
123 				colnum,
124 				low,
125 				high;
126 
127 	/* Should not be used for chrs in the locolormap */
128 	assert(c > MAX_SIMPLE_CHR);
129 
130 	/*
131 	 * Find which row it's in.  The colormapranges are in order, so we can use
132 	 * binary search.
133 	 */
134 	rownum = 0;					/* if no match, use array row zero */
135 	low = 0;
136 	high = cm->numcmranges;
137 	while (low < high)
138 	{
139 		int			middle = low + (high - low) / 2;
140 		const colormaprange *cmr = &cm->cmranges[middle];
141 
142 		if (c < cmr->cmin)
143 			high = middle;
144 		else if (c > cmr->cmax)
145 			low = middle + 1;
146 		else
147 		{
148 			rownum = cmr->rownum;	/* found a match */
149 			break;
150 		}
151 	}
152 
153 	/*
154 	 * Find which column it's in --- this is all locale-dependent.
155 	 */
156 	if (cm->hiarraycols > 1)
157 	{
158 		colnum = cclass_column_index(cm, c);
159 		return cm->hicolormap[rownum * cm->hiarraycols + colnum];
160 	}
161 	else
162 	{
163 		/* fast path if no relevant cclasses */
164 		return cm->hicolormap[rownum];
165 	}
166 }
167 
168 /*
169  * maxcolor - report largest color number in use
170  */
171 static color
maxcolor(struct colormap * cm)172 maxcolor(struct colormap *cm)
173 {
174 	if (CISERR())
175 		return COLORLESS;
176 
177 	return (color) cm->max;
178 }
179 
180 /*
181  * newcolor - find a new color (must be assigned at once)
182  * Beware:	may relocate the colordescs.
183  */
184 static color					/* COLORLESS for error */
newcolor(struct colormap * cm)185 newcolor(struct colormap *cm)
186 {
187 	struct colordesc *cd;
188 	size_t		n;
189 
190 	if (CISERR())
191 		return COLORLESS;
192 
193 	if (cm->free != 0)
194 	{
195 		assert(cm->free > 0);
196 		assert((size_t) cm->free < cm->ncds);
197 		cd = &cm->cd[cm->free];
198 		assert(UNUSEDCOLOR(cd));
199 		assert(cd->arcs == NULL);
200 		cm->free = cd->sub;
201 	}
202 	else if (cm->max < cm->ncds - 1)
203 	{
204 		cm->max++;
205 		cd = &cm->cd[cm->max];
206 	}
207 	else
208 	{
209 		/* oops, must allocate more */
210 		struct colordesc *newCd;
211 
212 		if (cm->max == MAX_COLOR)
213 		{
214 			CERR(REG_ECOLORS);
215 			return COLORLESS;	/* too many colors */
216 		}
217 
218 		n = cm->ncds * 2;
219 		if (n > MAX_COLOR + 1)
220 			n = MAX_COLOR + 1;
221 		if (cm->cd == cm->cdspace)
222 		{
223 			newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
224 			if (newCd != NULL)
225 				memcpy(VS(newCd), VS(cm->cdspace), cm->ncds *
226 					   sizeof(struct colordesc));
227 		}
228 		else
229 			newCd = (struct colordesc *)
230 				REALLOC(cm->cd, n * sizeof(struct colordesc));
231 		if (newCd == NULL)
232 		{
233 			CERR(REG_ESPACE);
234 			return COLORLESS;
235 		}
236 		cm->cd = newCd;
237 		cm->ncds = n;
238 		assert(cm->max < cm->ncds - 1);
239 		cm->max++;
240 		cd = &cm->cd[cm->max];
241 	}
242 
243 	cd->nschrs = 0;
244 	cd->nuchrs = 0;
245 	cd->sub = NOSUB;
246 	cd->arcs = NULL;
247 	cd->firstchr = CHR_MIN;		/* in case never set otherwise */
248 	cd->flags = 0;
249 
250 	return (color) (cd - cm->cd);
251 }
252 
253 /*
254  * freecolor - free a color (must have no arcs or subcolor)
255  */
256 static void
freecolor(struct colormap * cm,color co)257 freecolor(struct colormap *cm,
258 		  color co)
259 {
260 	struct colordesc *cd = &cm->cd[co];
261 	color		pco,
262 				nco;			/* for freelist scan */
263 
264 	assert(co >= 0);
265 	if (co == WHITE)
266 		return;
267 
268 	assert(cd->arcs == NULL);
269 	assert(cd->sub == NOSUB);
270 	assert(cd->nschrs == 0);
271 	assert(cd->nuchrs == 0);
272 	cd->flags = FREECOL;
273 
274 	if ((size_t) co == cm->max)
275 	{
276 		while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max]))
277 			cm->max--;
278 		assert(cm->free >= 0);
279 		while ((size_t) cm->free > cm->max)
280 			cm->free = cm->cd[cm->free].sub;
281 		if (cm->free > 0)
282 		{
283 			assert(cm->free < cm->max);
284 			pco = cm->free;
285 			nco = cm->cd[pco].sub;
286 			while (nco > 0)
287 				if ((size_t) nco > cm->max)
288 				{
289 					/* take this one out of freelist */
290 					nco = cm->cd[nco].sub;
291 					cm->cd[pco].sub = nco;
292 				}
293 				else
294 				{
295 					assert(nco < cm->max);
296 					pco = nco;
297 					nco = cm->cd[pco].sub;
298 				}
299 		}
300 	}
301 	else
302 	{
303 		cd->sub = cm->free;
304 		cm->free = (color) (cd - cm->cd);
305 	}
306 }
307 
308 /*
309  * pseudocolor - allocate a false color, to be managed by other means
310  */
311 static color
pseudocolor(struct colormap * cm)312 pseudocolor(struct colormap *cm)
313 {
314 	color		co;
315 	struct colordesc *cd;
316 
317 	co = newcolor(cm);
318 	if (CISERR())
319 		return COLORLESS;
320 	cd = &cm->cd[co];
321 	cd->nschrs = 0;
322 	cd->nuchrs = 1;				/* pretend it is in the upper map */
323 	cd->sub = NOSUB;
324 	cd->arcs = NULL;
325 	cd->firstchr = CHR_MIN;
326 	cd->flags = PSEUDO;
327 	return co;
328 }
329 
330 /*
331  * subcolor - allocate a new subcolor (if necessary) to this chr
332  *
333  * This works only for chrs that map into the low color map.
334  */
335 static color
subcolor(struct colormap * cm,chr c)336 subcolor(struct colormap *cm, chr c)
337 {
338 	color		co;				/* current color of c */
339 	color		sco;			/* new subcolor */
340 
341 	assert(c <= MAX_SIMPLE_CHR);
342 
343 	co = cm->locolormap[c - CHR_MIN];
344 	sco = newsub(cm, co);
345 	if (CISERR())
346 		return COLORLESS;
347 	assert(sco != COLORLESS);
348 
349 	if (co == sco)				/* already in an open subcolor */
350 		return co;				/* rest is redundant */
351 	cm->cd[co].nschrs--;
352 	if (cm->cd[sco].nschrs == 0)
353 		cm->cd[sco].firstchr = c;
354 	cm->cd[sco].nschrs++;
355 	cm->locolormap[c - CHR_MIN] = sco;
356 	return sco;
357 }
358 
359 /*
360  * subcolorhi - allocate a new subcolor (if necessary) to this colormap entry
361  *
362  * This is the same processing as subcolor(), but for entries in the high
363  * colormap, which do not necessarily correspond to exactly one chr code.
364  */
365 static color
subcolorhi(struct colormap * cm,color * pco)366 subcolorhi(struct colormap *cm, color *pco)
367 {
368 	color		co;				/* current color of entry */
369 	color		sco;			/* new subcolor */
370 
371 	co = *pco;
372 	sco = newsub(cm, co);
373 	if (CISERR())
374 		return COLORLESS;
375 	assert(sco != COLORLESS);
376 
377 	if (co == sco)				/* already in an open subcolor */
378 		return co;				/* rest is redundant */
379 	cm->cd[co].nuchrs--;
380 	cm->cd[sco].nuchrs++;
381 	*pco = sco;
382 	return sco;
383 }
384 
385 /*
386  * newsub - allocate a new subcolor (if necessary) for a color
387  */
388 static color
newsub(struct colormap * cm,color co)389 newsub(struct colormap *cm,
390 	   color co)
391 {
392 	color		sco;			/* new subcolor */
393 
394 	sco = cm->cd[co].sub;
395 	if (sco == NOSUB)
396 	{							/* color has no open subcolor */
397 		/* optimization: singly-referenced color need not be subcolored */
398 		if ((cm->cd[co].nschrs + cm->cd[co].nuchrs) == 1)
399 			return co;
400 		sco = newcolor(cm);		/* must create subcolor */
401 		if (sco == COLORLESS)
402 		{
403 			assert(CISERR());
404 			return COLORLESS;
405 		}
406 		cm->cd[co].sub = sco;
407 		cm->cd[sco].sub = sco;	/* open subcolor points to self */
408 	}
409 	assert(sco != NOSUB);
410 
411 	return sco;
412 }
413 
414 /*
415  * newhicolorrow - get a new row in the hicolormap, cloning it from oldrow
416  *
417  * Returns array index of new row.  Note the array might move.
418  */
419 static int
newhicolorrow(struct colormap * cm,int oldrow)420 newhicolorrow(struct colormap *cm,
421 			  int oldrow)
422 {
423 	int			newrow = cm->hiarrayrows;
424 	color	   *newrowptr;
425 	int			i;
426 
427 	/* Assign a fresh array row index, enlarging storage if needed */
428 	if (newrow >= cm->maxarrayrows)
429 	{
430 		color	   *newarray;
431 
432 		if (cm->maxarrayrows >= INT_MAX / (cm->hiarraycols * 2))
433 		{
434 			CERR(REG_ESPACE);
435 			return 0;
436 		}
437 		newarray = (color *) REALLOC(cm->hicolormap,
438 									 cm->maxarrayrows * 2 *
439 									 cm->hiarraycols * sizeof(color));
440 		if (newarray == NULL)
441 		{
442 			CERR(REG_ESPACE);
443 			return 0;
444 		}
445 		cm->hicolormap = newarray;
446 		cm->maxarrayrows *= 2;
447 	}
448 	cm->hiarrayrows++;
449 
450 	/* Copy old row data */
451 	newrowptr = &cm->hicolormap[newrow * cm->hiarraycols];
452 	memcpy(newrowptr,
453 		   &cm->hicolormap[oldrow * cm->hiarraycols],
454 		   cm->hiarraycols * sizeof(color));
455 
456 	/* Increase color reference counts to reflect new colormap entries */
457 	for (i = 0; i < cm->hiarraycols; i++)
458 		cm->cd[newrowptr[i]].nuchrs++;
459 
460 	return newrow;
461 }
462 
463 /*
464  * newhicolorcols - create a new set of columns in the high colormap
465  *
466  * Essentially, extends the 2-D array to the right with a copy of itself.
467  */
468 static void
newhicolorcols(struct colormap * cm)469 newhicolorcols(struct colormap *cm)
470 {
471 	color	   *newarray;
472 	int			r,
473 				c;
474 
475 	if (cm->hiarraycols >= INT_MAX / (cm->maxarrayrows * 2))
476 	{
477 		CERR(REG_ESPACE);
478 		return;
479 	}
480 	newarray = (color *) REALLOC(cm->hicolormap,
481 								 cm->maxarrayrows *
482 								 cm->hiarraycols * 2 * sizeof(color));
483 	if (newarray == NULL)
484 	{
485 		CERR(REG_ESPACE);
486 		return;
487 	}
488 	cm->hicolormap = newarray;
489 
490 	/* Duplicate existing columns to the right, and increase ref counts */
491 	/* Must work backwards in the array because we realloc'd in place */
492 	for (r = cm->hiarrayrows - 1; r >= 0; r--)
493 	{
494 		color	   *oldrowptr = &newarray[r * cm->hiarraycols];
495 		color	   *newrowptr = &newarray[r * cm->hiarraycols * 2];
496 		color	   *newrowptr2 = newrowptr + cm->hiarraycols;
497 
498 		for (c = 0; c < cm->hiarraycols; c++)
499 		{
500 			color		co = oldrowptr[c];
501 
502 			newrowptr[c] = newrowptr2[c] = co;
503 			cm->cd[co].nuchrs++;
504 		}
505 	}
506 
507 	cm->hiarraycols *= 2;
508 }
509 
510 /*
511  * subcolorcvec - allocate new subcolors to cvec members, fill in arcs
512  *
513  * For each chr "c" represented by the cvec, do the equivalent of
514  * newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
515  *
516  * Note that in typical cases, many of the subcolors are the same.
517  * While newarc() would discard duplicate arc requests, we can save
518  * some cycles by not calling it repetitively to begin with.  This is
519  * mechanized with the "lastsubcolor" state variable.
520  */
521 static void
subcolorcvec(struct vars * v,struct cvec * cv,struct state * lp,struct state * rp)522 subcolorcvec(struct vars *v,
523 			 struct cvec *cv,
524 			 struct state *lp,
525 			 struct state *rp)
526 {
527 	struct colormap *cm = v->cm;
528 	color		lastsubcolor = COLORLESS;
529 	chr			ch,
530 				from,
531 				to;
532 	const chr  *p;
533 	int			i;
534 
535 	/* ordinary characters */
536 	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
537 	{
538 		ch = *p;
539 		subcoloronechr(v, ch, lp, rp, &lastsubcolor);
540 		NOERR();
541 	}
542 
543 	/* and the ranges */
544 	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
545 	{
546 		from = *p;
547 		to = *(p + 1);
548 		if (from <= MAX_SIMPLE_CHR)
549 		{
550 			/* deal with simple chars one at a time */
551 			chr			lim = (to <= MAX_SIMPLE_CHR) ? to : MAX_SIMPLE_CHR;
552 
553 			while (from <= lim)
554 			{
555 				color		sco = subcolor(cm, from);
556 
557 				NOERR();
558 				if (sco != lastsubcolor)
559 				{
560 					newarc(v->nfa, PLAIN, sco, lp, rp);
561 					NOERR();
562 					lastsubcolor = sco;
563 				}
564 				from++;
565 			}
566 		}
567 		/* deal with any part of the range that's above MAX_SIMPLE_CHR */
568 		if (from < to)
569 			subcoloronerange(v, from, to, lp, rp, &lastsubcolor);
570 		else if (from == to)
571 			subcoloronechr(v, from, lp, rp, &lastsubcolor);
572 		NOERR();
573 	}
574 
575 	/* and deal with cclass if any */
576 	if (cv->cclasscode >= 0)
577 	{
578 		int			classbit;
579 		color	   *pco;
580 		int			r,
581 					c;
582 
583 		/* Enlarge array if we don't have a column bit assignment for cclass */
584 		if (cm->classbits[cv->cclasscode] == 0)
585 		{
586 			cm->classbits[cv->cclasscode] = cm->hiarraycols;
587 			newhicolorcols(cm);
588 			NOERR();
589 		}
590 		/* Apply subcolorhi() and make arc for each entry in relevant cols */
591 		classbit = cm->classbits[cv->cclasscode];
592 		pco = cm->hicolormap;
593 		for (r = 0; r < cm->hiarrayrows; r++)
594 		{
595 			for (c = 0; c < cm->hiarraycols; c++)
596 			{
597 				if (c & classbit)
598 				{
599 					color		sco = subcolorhi(cm, pco);
600 
601 					NOERR();
602 					/* add the arc if needed */
603 					if (sco != lastsubcolor)
604 					{
605 						newarc(v->nfa, PLAIN, sco, lp, rp);
606 						NOERR();
607 						lastsubcolor = sco;
608 					}
609 				}
610 				pco++;
611 			}
612 		}
613 	}
614 }
615 
616 /*
617  * subcoloronechr - do subcolorcvec's work for a singleton chr
618  *
619  * We could just let subcoloronerange do this, but it's a bit more efficient
620  * if we exploit the single-chr case.  Also, callers find it useful for this
621  * to be able to handle both low and high chr codes.
622  */
623 static void
subcoloronechr(struct vars * v,chr ch,struct state * lp,struct state * rp,color * lastsubcolor)624 subcoloronechr(struct vars *v,
625 			   chr ch,
626 			   struct state *lp,
627 			   struct state *rp,
628 			   color *lastsubcolor)
629 {
630 	struct colormap *cm = v->cm;
631 	colormaprange *newranges;
632 	int			numnewranges;
633 	colormaprange *oldrange;
634 	int			oldrangen;
635 	int			newrow;
636 
637 	/* Easy case for low chr codes */
638 	if (ch <= MAX_SIMPLE_CHR)
639 	{
640 		color		sco = subcolor(cm, ch);
641 
642 		NOERR();
643 		if (sco != *lastsubcolor)
644 		{
645 			newarc(v->nfa, PLAIN, sco, lp, rp);
646 			*lastsubcolor = sco;
647 		}
648 		return;
649 	}
650 
651 	/*
652 	 * Potentially, we could need two more colormapranges than we have now, if
653 	 * the given chr is in the middle of some existing range.
654 	 */
655 	newranges = (colormaprange *)
656 		MALLOC((cm->numcmranges + 2) * sizeof(colormaprange));
657 	if (newranges == NULL)
658 	{
659 		CERR(REG_ESPACE);
660 		return;
661 	}
662 	numnewranges = 0;
663 
664 	/* Ranges before target are unchanged */
665 	for (oldrange = cm->cmranges, oldrangen = 0;
666 		 oldrangen < cm->numcmranges;
667 		 oldrange++, oldrangen++)
668 	{
669 		if (oldrange->cmax >= ch)
670 			break;
671 		newranges[numnewranges++] = *oldrange;
672 	}
673 
674 	/* Match target chr against current range */
675 	if (oldrangen >= cm->numcmranges || oldrange->cmin > ch)
676 	{
677 		/* chr does not belong to any existing range, make a new one */
678 		newranges[numnewranges].cmin = ch;
679 		newranges[numnewranges].cmax = ch;
680 		/* row state should be cloned from the "all others" row */
681 		newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
682 		numnewranges++;
683 	}
684 	else if (oldrange->cmin == oldrange->cmax)
685 	{
686 		/* we have an existing singleton range matching the chr */
687 		newranges[numnewranges++] = *oldrange;
688 		newrow = oldrange->rownum;
689 		/* we've now fully processed this old range */
690 		oldrange++, oldrangen++;
691 	}
692 	else
693 	{
694 		/* chr is a subset of this existing range, must split it */
695 		if (ch > oldrange->cmin)
696 		{
697 			/* emit portion of old range before chr */
698 			newranges[numnewranges].cmin = oldrange->cmin;
699 			newranges[numnewranges].cmax = ch - 1;
700 			newranges[numnewranges].rownum = oldrange->rownum;
701 			numnewranges++;
702 		}
703 		/* emit chr as singleton range, initially cloning from range */
704 		newranges[numnewranges].cmin = ch;
705 		newranges[numnewranges].cmax = ch;
706 		newranges[numnewranges].rownum = newrow =
707 			newhicolorrow(cm, oldrange->rownum);
708 		numnewranges++;
709 		if (ch < oldrange->cmax)
710 		{
711 			/* emit portion of old range after chr */
712 			newranges[numnewranges].cmin = ch + 1;
713 			newranges[numnewranges].cmax = oldrange->cmax;
714 			/* must clone the row if we are making two new ranges from old */
715 			newranges[numnewranges].rownum =
716 				(ch > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
717 				oldrange->rownum;
718 			numnewranges++;
719 		}
720 		/* we've now fully processed this old range */
721 		oldrange++, oldrangen++;
722 	}
723 
724 	/* Update colors in newrow and create arcs as needed */
725 	subcoloronerow(v, newrow, lp, rp, lastsubcolor);
726 
727 	/* Ranges after target are unchanged */
728 	for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
729 	{
730 		newranges[numnewranges++] = *oldrange;
731 	}
732 
733 	/* Assert our original space estimate was adequate */
734 	assert(numnewranges <= (cm->numcmranges + 2));
735 
736 	/* And finally, store back the updated list of ranges */
737 	if (cm->cmranges != NULL)
738 		FREE(cm->cmranges);
739 	cm->cmranges = newranges;
740 	cm->numcmranges = numnewranges;
741 }
742 
743 /*
744  * subcoloronerange - do subcolorcvec's work for a high range
745  */
746 static void
subcoloronerange(struct vars * v,chr from,chr to,struct state * lp,struct state * rp,color * lastsubcolor)747 subcoloronerange(struct vars *v,
748 				 chr from,
749 				 chr to,
750 				 struct state *lp,
751 				 struct state *rp,
752 				 color *lastsubcolor)
753 {
754 	struct colormap *cm = v->cm;
755 	colormaprange *newranges;
756 	int			numnewranges;
757 	colormaprange *oldrange;
758 	int			oldrangen;
759 	int			newrow;
760 
761 	/* Caller should take care of non-high-range cases */
762 	assert(from > MAX_SIMPLE_CHR);
763 	assert(from < to);
764 
765 	/*
766 	 * Potentially, if we have N non-adjacent ranges, we could need as many as
767 	 * 2N+1 result ranges (consider case where new range spans 'em all).
768 	 */
769 	newranges = (colormaprange *)
770 		MALLOC((cm->numcmranges * 2 + 1) * sizeof(colormaprange));
771 	if (newranges == NULL)
772 	{
773 		CERR(REG_ESPACE);
774 		return;
775 	}
776 	numnewranges = 0;
777 
778 	/* Ranges before target are unchanged */
779 	for (oldrange = cm->cmranges, oldrangen = 0;
780 		 oldrangen < cm->numcmranges;
781 		 oldrange++, oldrangen++)
782 	{
783 		if (oldrange->cmax >= from)
784 			break;
785 		newranges[numnewranges++] = *oldrange;
786 	}
787 
788 	/*
789 	 * Deal with ranges that (partially) overlap the target.  As we process
790 	 * each such range, increase "from" to remove the dealt-with characters
791 	 * from the target range.
792 	 */
793 	while (oldrangen < cm->numcmranges && oldrange->cmin <= to)
794 	{
795 		if (from < oldrange->cmin)
796 		{
797 			/* Handle portion of new range that corresponds to no old range */
798 			newranges[numnewranges].cmin = from;
799 			newranges[numnewranges].cmax = oldrange->cmin - 1;
800 			/* row state should be cloned from the "all others" row */
801 			newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
802 			numnewranges++;
803 			/* Update colors in newrow and create arcs as needed */
804 			subcoloronerow(v, newrow, lp, rp, lastsubcolor);
805 			/* We've now fully processed the part of new range before old */
806 			from = oldrange->cmin;
807 		}
808 
809 		if (from <= oldrange->cmin && to >= oldrange->cmax)
810 		{
811 			/* old range is fully contained in new, process it in-place */
812 			newranges[numnewranges++] = *oldrange;
813 			newrow = oldrange->rownum;
814 			from = oldrange->cmax + 1;
815 		}
816 		else
817 		{
818 			/* some part of old range does not overlap new range */
819 			if (from > oldrange->cmin)
820 			{
821 				/* emit portion of old range before new range */
822 				newranges[numnewranges].cmin = oldrange->cmin;
823 				newranges[numnewranges].cmax = from - 1;
824 				newranges[numnewranges].rownum = oldrange->rownum;
825 				numnewranges++;
826 			}
827 			/* emit common subrange, initially cloning from old range */
828 			newranges[numnewranges].cmin = from;
829 			newranges[numnewranges].cmax =
830 				(to < oldrange->cmax) ? to : oldrange->cmax;
831 			newranges[numnewranges].rownum = newrow =
832 				newhicolorrow(cm, oldrange->rownum);
833 			numnewranges++;
834 			if (to < oldrange->cmax)
835 			{
836 				/* emit portion of old range after new range */
837 				newranges[numnewranges].cmin = to + 1;
838 				newranges[numnewranges].cmax = oldrange->cmax;
839 				/* must clone the row if we are making two new ranges from old */
840 				newranges[numnewranges].rownum =
841 					(from > oldrange->cmin) ? newhicolorrow(cm, oldrange->rownum) :
842 					oldrange->rownum;
843 				numnewranges++;
844 			}
845 			from = oldrange->cmax + 1;
846 		}
847 		/* Update colors in newrow and create arcs as needed */
848 		subcoloronerow(v, newrow, lp, rp, lastsubcolor);
849 		/* we've now fully processed this old range */
850 		oldrange++, oldrangen++;
851 	}
852 
853 	if (from <= to)
854 	{
855 		/* Handle portion of new range that corresponds to no old range */
856 		newranges[numnewranges].cmin = from;
857 		newranges[numnewranges].cmax = to;
858 		/* row state should be cloned from the "all others" row */
859 		newranges[numnewranges].rownum = newrow = newhicolorrow(cm, 0);
860 		numnewranges++;
861 		/* Update colors in newrow and create arcs as needed */
862 		subcoloronerow(v, newrow, lp, rp, lastsubcolor);
863 	}
864 
865 	/* Ranges after target are unchanged */
866 	for (; oldrangen < cm->numcmranges; oldrange++, oldrangen++)
867 	{
868 		newranges[numnewranges++] = *oldrange;
869 	}
870 
871 	/* Assert our original space estimate was adequate */
872 	assert(numnewranges <= (cm->numcmranges * 2 + 1));
873 
874 	/* And finally, store back the updated list of ranges */
875 	if (cm->cmranges != NULL)
876 		FREE(cm->cmranges);
877 	cm->cmranges = newranges;
878 	cm->numcmranges = numnewranges;
879 }
880 
881 /*
882  * subcoloronerow - do subcolorcvec's work for one new row in the high colormap
883  */
884 static void
subcoloronerow(struct vars * v,int rownum,struct state * lp,struct state * rp,color * lastsubcolor)885 subcoloronerow(struct vars *v,
886 			   int rownum,
887 			   struct state *lp,
888 			   struct state *rp,
889 			   color *lastsubcolor)
890 {
891 	struct colormap *cm = v->cm;
892 	color	   *pco;
893 	int			i;
894 
895 	/* Apply subcolorhi() and make arc for each entry in row */
896 	pco = &cm->hicolormap[rownum * cm->hiarraycols];
897 	for (i = 0; i < cm->hiarraycols; pco++, i++)
898 	{
899 		color		sco = subcolorhi(cm, pco);
900 
901 		NOERR();
902 		/* make the arc if needed */
903 		if (sco != *lastsubcolor)
904 		{
905 			newarc(v->nfa, PLAIN, sco, lp, rp);
906 			NOERR();
907 			*lastsubcolor = sco;
908 		}
909 	}
910 }
911 
912 /*
913  * okcolors - promote subcolors to full colors
914  */
915 static void
okcolors(struct nfa * nfa,struct colormap * cm)916 okcolors(struct nfa *nfa,
917 		 struct colormap *cm)
918 {
919 	struct colordesc *cd;
920 	struct colordesc *end = CDEND(cm);
921 	struct colordesc *scd;
922 	struct arc *a;
923 	color		co;
924 	color		sco;
925 
926 	for (cd = cm->cd, co = 0; cd < end; cd++, co++)
927 	{
928 		sco = cd->sub;
929 		if (UNUSEDCOLOR(cd) || sco == NOSUB)
930 		{
931 			/* has no subcolor, no further action */
932 		}
933 		else if (sco == co)
934 		{
935 			/* is subcolor, let parent deal with it */
936 		}
937 		else if (cd->nschrs == 0 && cd->nuchrs == 0)
938 		{
939 			/* parent empty, its arcs change color to subcolor */
940 			cd->sub = NOSUB;
941 			scd = &cm->cd[sco];
942 			assert(scd->nschrs > 0 || scd->nuchrs > 0);
943 			assert(scd->sub == sco);
944 			scd->sub = NOSUB;
945 			while ((a = cd->arcs) != NULL)
946 			{
947 				assert(a->co == co);
948 				uncolorchain(cm, a);
949 				a->co = sco;
950 				colorchain(cm, a);
951 			}
952 			freecolor(cm, co);
953 		}
954 		else
955 		{
956 			/* parent's arcs must gain parallel subcolor arcs */
957 			cd->sub = NOSUB;
958 			scd = &cm->cd[sco];
959 			assert(scd->nschrs > 0 || scd->nuchrs > 0);
960 			assert(scd->sub == sco);
961 			scd->sub = NOSUB;
962 			for (a = cd->arcs; a != NULL; a = a->colorchain)
963 			{
964 				assert(a->co == co);
965 				newarc(nfa, a->type, sco, a->from, a->to);
966 			}
967 		}
968 	}
969 }
970 
971 /*
972  * colorchain - add this arc to the color chain of its color
973  */
974 static void
colorchain(struct colormap * cm,struct arc * a)975 colorchain(struct colormap *cm,
976 		   struct arc *a)
977 {
978 	struct colordesc *cd = &cm->cd[a->co];
979 
980 	if (cd->arcs != NULL)
981 		cd->arcs->colorchainRev = a;
982 	a->colorchain = cd->arcs;
983 	a->colorchainRev = NULL;
984 	cd->arcs = a;
985 }
986 
987 /*
988  * uncolorchain - delete this arc from the color chain of its color
989  */
990 static void
uncolorchain(struct colormap * cm,struct arc * a)991 uncolorchain(struct colormap *cm,
992 			 struct arc *a)
993 {
994 	struct colordesc *cd = &cm->cd[a->co];
995 	struct arc *aa = a->colorchainRev;
996 
997 	if (aa == NULL)
998 	{
999 		assert(cd->arcs == a);
1000 		cd->arcs = a->colorchain;
1001 	}
1002 	else
1003 	{
1004 		assert(aa->colorchain == a);
1005 		aa->colorchain = a->colorchain;
1006 	}
1007 	if (a->colorchain != NULL)
1008 		a->colorchain->colorchainRev = aa;
1009 	a->colorchain = NULL;		/* paranoia */
1010 	a->colorchainRev = NULL;
1011 }
1012 
1013 /*
1014  * rainbow - add arcs of all full colors (but one) between specified states
1015  */
1016 static void
rainbow(struct nfa * nfa,struct colormap * cm,int type,color but,struct state * from,struct state * to)1017 rainbow(struct nfa *nfa,
1018 		struct colormap *cm,
1019 		int type,
1020 		color but,				/* COLORLESS if no exceptions */
1021 		struct state *from,
1022 		struct state *to)
1023 {
1024 	struct colordesc *cd;
1025 	struct colordesc *end = CDEND(cm);
1026 	color		co;
1027 
1028 	for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1029 		if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but &&
1030 			!(cd->flags & PSEUDO))
1031 			newarc(nfa, type, co, from, to);
1032 }
1033 
1034 /*
1035  * colorcomplement - add arcs of complementary colors
1036  *
1037  * The calling sequence ought to be reconciled with cloneouts().
1038  */
1039 static void
colorcomplement(struct nfa * nfa,struct colormap * cm,int type,struct state * of,struct state * from,struct state * to)1040 colorcomplement(struct nfa *nfa,
1041 				struct colormap *cm,
1042 				int type,
1043 				struct state *of,	/* complements of this guy's PLAIN outarcs */
1044 				struct state *from,
1045 				struct state *to)
1046 {
1047 	struct colordesc *cd;
1048 	struct colordesc *end = CDEND(cm);
1049 	color		co;
1050 
1051 	assert(of != from);
1052 	for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++)
1053 		if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO))
1054 			if (findarc(of, PLAIN, co) == NULL)
1055 				newarc(nfa, type, co, from, to);
1056 }
1057 
1058 
1059 #ifdef REG_DEBUG
1060 
1061 /*
1062  * dumpcolors - debugging output
1063  */
1064 static void
dumpcolors(struct colormap * cm,FILE * f)1065 dumpcolors(struct colormap *cm,
1066 		   FILE *f)
1067 {
1068 	struct colordesc *cd;
1069 	struct colordesc *end;
1070 	color		co;
1071 	chr			c;
1072 
1073 	fprintf(f, "max %ld\n", (long) cm->max);
1074 	end = CDEND(cm);
1075 	for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */
1076 	{
1077 		if (!UNUSEDCOLOR(cd))
1078 		{
1079 			assert(cd->nschrs > 0 || cd->nuchrs > 0);
1080 			if (cd->flags & PSEUDO)
1081 				fprintf(f, "#%2ld(ps): ", (long) co);
1082 			else
1083 				fprintf(f, "#%2ld(%2d): ", (long) co, cd->nschrs + cd->nuchrs);
1084 
1085 			/*
1086 			 * Unfortunately, it's hard to do this next bit more efficiently.
1087 			 */
1088 			for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
1089 				if (GETCOLOR(cm, c) == co)
1090 					dumpchr(c, f);
1091 			fprintf(f, "\n");
1092 		}
1093 	}
1094 	/* dump the high colormap if it contains anything interesting */
1095 	if (cm->hiarrayrows > 1 || cm->hiarraycols > 1)
1096 	{
1097 		int			r,
1098 					c;
1099 		const color *rowptr;
1100 
1101 		fprintf(f, "other:\t");
1102 		for (c = 0; c < cm->hiarraycols; c++)
1103 		{
1104 			fprintf(f, "\t%ld", (long) cm->hicolormap[c]);
1105 		}
1106 		fprintf(f, "\n");
1107 		for (r = 0; r < cm->numcmranges; r++)
1108 		{
1109 			dumpchr(cm->cmranges[r].cmin, f);
1110 			fprintf(f, "..");
1111 			dumpchr(cm->cmranges[r].cmax, f);
1112 			fprintf(f, ":");
1113 			rowptr = &cm->hicolormap[cm->cmranges[r].rownum * cm->hiarraycols];
1114 			for (c = 0; c < cm->hiarraycols; c++)
1115 			{
1116 				fprintf(f, "\t%ld", (long) rowptr[c]);
1117 			}
1118 			fprintf(f, "\n");
1119 		}
1120 	}
1121 }
1122 
1123 /*
1124  * dumpchr - print a chr
1125  *
1126  * Kind of char-centric but works well enough for debug use.
1127  */
1128 static void
dumpchr(chr c,FILE * f)1129 dumpchr(chr c,
1130 		FILE *f)
1131 {
1132 	if (c == '\\')
1133 		fprintf(f, "\\\\");
1134 	else if (c > ' ' && c <= '~')
1135 		putc((char) c, f);
1136 	else
1137 		fprintf(f, "\\u%04lx", (long) c);
1138 }
1139 
1140 #endif							/* REG_DEBUG */
1141