xref: /freebsd/contrib/flex/src/ecs.c (revision 7e382390)
1*7e382390SJung-uk Kim /* ecs - equivalence class routines */
2*7e382390SJung-uk Kim 
3*7e382390SJung-uk Kim /*  Copyright (c) 1990 The Regents of the University of California. */
4*7e382390SJung-uk Kim /*  All rights reserved. */
5*7e382390SJung-uk Kim 
6*7e382390SJung-uk Kim /*  This code is derived from software contributed to Berkeley by */
7*7e382390SJung-uk Kim /*  Vern Paxson. */
8*7e382390SJung-uk Kim 
9*7e382390SJung-uk Kim /*  The United States Government has rights in this work pursuant */
10*7e382390SJung-uk Kim /*  to contract no. DE-AC03-76SF00098 between the United States */
11*7e382390SJung-uk Kim /*  Department of Energy and the University of California. */
12*7e382390SJung-uk Kim 
13*7e382390SJung-uk Kim /* This file is part of flex */
14*7e382390SJung-uk Kim 
15*7e382390SJung-uk Kim /*  Redistribution and use in source and binary forms, with or without */
16*7e382390SJung-uk Kim /*  modification, are permitted provided that the following conditions */
17*7e382390SJung-uk Kim /*  are met: */
18*7e382390SJung-uk Kim 
19*7e382390SJung-uk Kim /*  1. Redistributions of source code must retain the above copyright */
20*7e382390SJung-uk Kim /*     notice, this list of conditions and the following disclaimer. */
21*7e382390SJung-uk Kim /*  2. Redistributions in binary form must reproduce the above copyright */
22*7e382390SJung-uk Kim /*     notice, this list of conditions and the following disclaimer in the */
23*7e382390SJung-uk Kim /*     documentation and/or other materials provided with the distribution. */
24*7e382390SJung-uk Kim 
25*7e382390SJung-uk Kim /*  Neither the name of the University nor the names of its contributors */
26*7e382390SJung-uk Kim /*  may be used to endorse or promote products derived from this software */
27*7e382390SJung-uk Kim /*  without specific prior written permission. */
28*7e382390SJung-uk Kim 
29*7e382390SJung-uk Kim /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30*7e382390SJung-uk Kim /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31*7e382390SJung-uk Kim /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32*7e382390SJung-uk Kim /*  PURPOSE. */
33*7e382390SJung-uk Kim 
34*7e382390SJung-uk Kim 
35*7e382390SJung-uk Kim #include "flexdef.h"
36*7e382390SJung-uk Kim 
37*7e382390SJung-uk Kim /* ccl2ecl - convert character classes to set of equivalence classes */
38*7e382390SJung-uk Kim 
ccl2ecl(void)39*7e382390SJung-uk Kim void    ccl2ecl (void)
40*7e382390SJung-uk Kim {
41*7e382390SJung-uk Kim 	int     i, ich, newlen, cclp, ccls, cclmec;
42*7e382390SJung-uk Kim 
43*7e382390SJung-uk Kim 	for (i = 1; i <= lastccl; ++i) {
44*7e382390SJung-uk Kim 		/* We loop through each character class, and for each character
45*7e382390SJung-uk Kim 		 * in the class, add the character's equivalence class to the
46*7e382390SJung-uk Kim 		 * new "character" class we are creating.  Thus when we are all
47*7e382390SJung-uk Kim 		 * done, character classes will really consist of collections
48*7e382390SJung-uk Kim 		 * of equivalence classes
49*7e382390SJung-uk Kim 		 */
50*7e382390SJung-uk Kim 
51*7e382390SJung-uk Kim 		newlen = 0;
52*7e382390SJung-uk Kim 		cclp = cclmap[i];
53*7e382390SJung-uk Kim 
54*7e382390SJung-uk Kim 		for (ccls = 0; ccls < ccllen[i]; ++ccls) {
55*7e382390SJung-uk Kim 			ich = ccltbl[cclp + ccls];
56*7e382390SJung-uk Kim 			cclmec = ecgroup[ich];
57*7e382390SJung-uk Kim 
58*7e382390SJung-uk Kim 			if (cclmec > 0) {
59*7e382390SJung-uk Kim 				/* Note: range 1..256 is mapped to 1..255,0 */
60*7e382390SJung-uk Kim 				ccltbl[cclp + newlen] = (unsigned char) cclmec;
61*7e382390SJung-uk Kim 				++newlen;
62*7e382390SJung-uk Kim 			}
63*7e382390SJung-uk Kim 		}
64*7e382390SJung-uk Kim 
65*7e382390SJung-uk Kim 		ccllen[i] = newlen;
66*7e382390SJung-uk Kim 	}
67*7e382390SJung-uk Kim }
68*7e382390SJung-uk Kim 
69*7e382390SJung-uk Kim 
70*7e382390SJung-uk Kim /* cre8ecs - associate equivalence class numbers with class members
71*7e382390SJung-uk Kim  *
72*7e382390SJung-uk Kim  * fwd is the forward linked-list of equivalence class members.  bck
73*7e382390SJung-uk Kim  * is the backward linked-list, and num is the number of class members.
74*7e382390SJung-uk Kim  *
75*7e382390SJung-uk Kim  * Returned is the number of classes.
76*7e382390SJung-uk Kim  */
77*7e382390SJung-uk Kim 
cre8ecs(int fwd[],int bck[],int num)78*7e382390SJung-uk Kim int     cre8ecs (int fwd[], int bck[], int num)
79*7e382390SJung-uk Kim {
80*7e382390SJung-uk Kim 	int     i, j, numcl;
81*7e382390SJung-uk Kim 
82*7e382390SJung-uk Kim 	numcl = 0;
83*7e382390SJung-uk Kim 
84*7e382390SJung-uk Kim 	/* Create equivalence class numbers.  From now on, ABS( bck(x) )
85*7e382390SJung-uk Kim 	 * is the equivalence class number for object x.  If bck(x)
86*7e382390SJung-uk Kim 	 * is positive, then x is the representative of its equivalence
87*7e382390SJung-uk Kim 	 * class.
88*7e382390SJung-uk Kim 	 */
89*7e382390SJung-uk Kim 	for (i = 1; i <= num; ++i)
90*7e382390SJung-uk Kim 		if (bck[i] == NIL) {
91*7e382390SJung-uk Kim 			bck[i] = ++numcl;
92*7e382390SJung-uk Kim 			for (j = fwd[i]; j != NIL; j = fwd[j])
93*7e382390SJung-uk Kim 				bck[j] = -numcl;
94*7e382390SJung-uk Kim 		}
95*7e382390SJung-uk Kim 
96*7e382390SJung-uk Kim 	return numcl;
97*7e382390SJung-uk Kim }
98*7e382390SJung-uk Kim 
99*7e382390SJung-uk Kim 
100*7e382390SJung-uk Kim /* mkeccl - update equivalence classes based on character class xtions
101*7e382390SJung-uk Kim  *
102*7e382390SJung-uk Kim  * synopsis
103*7e382390SJung-uk Kim  *    unsigned char ccls[];
104*7e382390SJung-uk Kim  *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
105*7e382390SJung-uk Kim  *    void mkeccl( unsigned char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
106*7e382390SJung-uk Kim  *			int llsiz, int NUL_mapping );
107*7e382390SJung-uk Kim  *
108*7e382390SJung-uk Kim  * ccls contains the elements of the character class, lenccl is the
109*7e382390SJung-uk Kim  * number of elements in the ccl, fwd is the forward link-list of equivalent
110*7e382390SJung-uk Kim  * characters, bck is the backward link-list, and llsiz size of the link-list.
111*7e382390SJung-uk Kim  *
112*7e382390SJung-uk Kim  * NUL_mapping is the value which NUL (0) should be mapped to.
113*7e382390SJung-uk Kim  */
114*7e382390SJung-uk Kim 
mkeccl(unsigned char ccls[],int lenccl,int fwd[],int bck[],int llsiz,int NUL_mapping)115*7e382390SJung-uk Kim void    mkeccl (unsigned char ccls[], int lenccl, int fwd[], int bck[], int llsiz, int NUL_mapping)
116*7e382390SJung-uk Kim {
117*7e382390SJung-uk Kim 	int     cclp, oldec, newec;
118*7e382390SJung-uk Kim 	int     cclm, i, j;
119*7e382390SJung-uk Kim 	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
120*7e382390SJung-uk Kim 
121*7e382390SJung-uk Kim 	/* Note that it doesn't matter whether or not the character class is
122*7e382390SJung-uk Kim 	 * negated.  The same results will be obtained in either case.
123*7e382390SJung-uk Kim 	 */
124*7e382390SJung-uk Kim 
125*7e382390SJung-uk Kim 	cclp = 0;
126*7e382390SJung-uk Kim 
127*7e382390SJung-uk Kim 	while (cclp < lenccl) {
128*7e382390SJung-uk Kim 		cclm = ccls[cclp];
129*7e382390SJung-uk Kim 
130*7e382390SJung-uk Kim 		if (NUL_mapping && cclm == 0)
131*7e382390SJung-uk Kim 			cclm = NUL_mapping;
132*7e382390SJung-uk Kim 
133*7e382390SJung-uk Kim 		oldec = bck[cclm];
134*7e382390SJung-uk Kim 		newec = cclm;
135*7e382390SJung-uk Kim 
136*7e382390SJung-uk Kim 		j = cclp + 1;
137*7e382390SJung-uk Kim 
138*7e382390SJung-uk Kim 		for (i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i]) {	/* look for the symbol in the character class */
139*7e382390SJung-uk Kim 			for (; j < lenccl; ++j) {
140*7e382390SJung-uk Kim 				int ccl_char;
141*7e382390SJung-uk Kim 
142*7e382390SJung-uk Kim 				if (NUL_mapping && ccls[j] == 0)
143*7e382390SJung-uk Kim 					ccl_char = NUL_mapping;
144*7e382390SJung-uk Kim 				else
145*7e382390SJung-uk Kim 					ccl_char = ccls[j];
146*7e382390SJung-uk Kim 
147*7e382390SJung-uk Kim 				if (ccl_char > i)
148*7e382390SJung-uk Kim 					break;
149*7e382390SJung-uk Kim 
150*7e382390SJung-uk Kim 				if (ccl_char == i && !cclflags[j]) {
151*7e382390SJung-uk Kim 					/* We found an old companion of cclm
152*7e382390SJung-uk Kim 					 * in the ccl.  Link it into the new
153*7e382390SJung-uk Kim 					 * equivalence class and flag it as
154*7e382390SJung-uk Kim 					 * having been processed.
155*7e382390SJung-uk Kim 					 */
156*7e382390SJung-uk Kim 
157*7e382390SJung-uk Kim 					bck[i] = newec;
158*7e382390SJung-uk Kim 					fwd[newec] = i;
159*7e382390SJung-uk Kim 					newec = i;
160*7e382390SJung-uk Kim 					/* Set flag so we don't reprocess. */
161*7e382390SJung-uk Kim 					cclflags[j] = 1;
162*7e382390SJung-uk Kim 
163*7e382390SJung-uk Kim 					/* Get next equivalence class member. */
164*7e382390SJung-uk Kim 					/* continue 2 */
165*7e382390SJung-uk Kim 					goto next_pt;
166*7e382390SJung-uk Kim 				}
167*7e382390SJung-uk Kim 			}
168*7e382390SJung-uk Kim 
169*7e382390SJung-uk Kim 			/* Symbol isn't in character class.  Put it in the old
170*7e382390SJung-uk Kim 			 * equivalence class.
171*7e382390SJung-uk Kim 			 */
172*7e382390SJung-uk Kim 
173*7e382390SJung-uk Kim 			bck[i] = oldec;
174*7e382390SJung-uk Kim 
175*7e382390SJung-uk Kim 			if (oldec != NIL)
176*7e382390SJung-uk Kim 				fwd[oldec] = i;
177*7e382390SJung-uk Kim 
178*7e382390SJung-uk Kim 			oldec = i;
179*7e382390SJung-uk Kim 
180*7e382390SJung-uk Kim 		      next_pt:;
181*7e382390SJung-uk Kim 		}
182*7e382390SJung-uk Kim 
183*7e382390SJung-uk Kim 		if (bck[cclm] != NIL || oldec != bck[cclm]) {
184*7e382390SJung-uk Kim 			bck[cclm] = NIL;
185*7e382390SJung-uk Kim 			fwd[oldec] = NIL;
186*7e382390SJung-uk Kim 		}
187*7e382390SJung-uk Kim 
188*7e382390SJung-uk Kim 		fwd[newec] = NIL;
189*7e382390SJung-uk Kim 
190*7e382390SJung-uk Kim 		/* Find next ccl member to process. */
191*7e382390SJung-uk Kim 
192*7e382390SJung-uk Kim 		for (++cclp; cclp < lenccl && cclflags[cclp]; ++cclp) {
193*7e382390SJung-uk Kim 			/* Reset "doesn't need processing" flag. */
194*7e382390SJung-uk Kim 			cclflags[cclp] = 0;
195*7e382390SJung-uk Kim 		}
196*7e382390SJung-uk Kim 	}
197*7e382390SJung-uk Kim }
198*7e382390SJung-uk Kim 
199*7e382390SJung-uk Kim 
200*7e382390SJung-uk Kim /* mkechar - create equivalence class for single character */
201*7e382390SJung-uk Kim 
mkechar(int tch,int fwd[],int bck[])202*7e382390SJung-uk Kim void    mkechar (int tch, int fwd[], int bck[])
203*7e382390SJung-uk Kim {
204*7e382390SJung-uk Kim 	/* If until now the character has been a proper subset of
205*7e382390SJung-uk Kim 	 * an equivalence class, break it away to create a new ec
206*7e382390SJung-uk Kim 	 */
207*7e382390SJung-uk Kim 
208*7e382390SJung-uk Kim 	if (fwd[tch] != NIL)
209*7e382390SJung-uk Kim 		bck[fwd[tch]] = bck[tch];
210*7e382390SJung-uk Kim 
211*7e382390SJung-uk Kim 	if (bck[tch] != NIL)
212*7e382390SJung-uk Kim 		fwd[bck[tch]] = fwd[tch];
213*7e382390SJung-uk Kim 
214*7e382390SJung-uk Kim 	fwd[tch] = NIL;
215*7e382390SJung-uk Kim 	bck[tch] = NIL;
216*7e382390SJung-uk Kim }
217