xref: /openbsd/usr.bin/lex/ecs.c (revision 8880fbfe)
1*8880fbfeStedu /*	$OpenBSD: ecs.c,v 1.9 2015/11/20 18:54:49 tedu Exp $	*/
21258a77dSderaadt 
3df930be7Sderaadt /* ecs - equivalence class routines */
4df930be7Sderaadt 
5a58c1ecbStedu /*  Copyright (c) 1990 The Regents of the University of California. */
6a58c1ecbStedu /*  All rights reserved. */
7df930be7Sderaadt 
8a58c1ecbStedu /*  This code is derived from software contributed to Berkeley by */
9a58c1ecbStedu /*  Vern Paxson. */
10a58c1ecbStedu 
11a58c1ecbStedu /*  The United States Government has rights in this work pursuant */
12a58c1ecbStedu /*  to contract no. DE-AC03-76SF00098 between the United States */
13a58c1ecbStedu /*  Department of Energy and the University of California. */
14a58c1ecbStedu 
15a58c1ecbStedu /* This file is part of flex */
16a58c1ecbStedu 
17a58c1ecbStedu /*  Redistribution and use in source and binary forms, with or without */
18a58c1ecbStedu /*  modification, are permitted provided that the following conditions */
19a58c1ecbStedu /*  are met: */
20a58c1ecbStedu 
21a58c1ecbStedu /*  1. Redistributions of source code must retain the above copyright */
22a58c1ecbStedu /*     notice, this list of conditions and the following disclaimer. */
23a58c1ecbStedu /*  2. Redistributions in binary form must reproduce the above copyright */
24a58c1ecbStedu /*     notice, this list of conditions and the following disclaimer in the */
25a58c1ecbStedu /*     documentation and/or other materials provided with the distribution. */
26a58c1ecbStedu 
27a58c1ecbStedu /*  Neither the name of the University nor the names of its contributors */
28a58c1ecbStedu /*  may be used to endorse or promote products derived from this software */
29a58c1ecbStedu /*  without specific prior written permission. */
30a58c1ecbStedu 
31a58c1ecbStedu /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32a58c1ecbStedu /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33a58c1ecbStedu /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34a58c1ecbStedu /*  PURPOSE. */
35a58c1ecbStedu 
36df930be7Sderaadt 
37df930be7Sderaadt #include "flexdef.h"
38df930be7Sderaadt 
39df930be7Sderaadt /* ccl2ecl - convert character classes to set of equivalence classes */
40df930be7Sderaadt 
41*8880fbfeStedu void
ccl2ecl(void)42*8880fbfeStedu ccl2ecl(void)
43df930be7Sderaadt {
44df930be7Sderaadt 	int i, ich, newlen, cclp, ccls, cclmec;
45df930be7Sderaadt 
46a58c1ecbStedu 	for (i = 1; i <= lastccl; ++i) {
47*8880fbfeStedu 		/*
48*8880fbfeStedu 		 * We loop through each character class, and for each
49*8880fbfeStedu 		 * character in the class, add the character's equivalence
50*8880fbfeStedu 		 * class to the new "character" class we are creating.  Thus
51*8880fbfeStedu 		 * when we are all done, character classes will really
52*8880fbfeStedu 		 * consist of collections of equivalence classes
53df930be7Sderaadt 		 */
54df930be7Sderaadt 
55df930be7Sderaadt 		newlen = 0;
56df930be7Sderaadt 		cclp = cclmap[i];
57df930be7Sderaadt 
58a58c1ecbStedu 		for (ccls = 0; ccls < ccllen[i]; ++ccls) {
59df930be7Sderaadt 			ich = ccltbl[cclp + ccls];
60df930be7Sderaadt 			cclmec = ecgroup[ich];
61df930be7Sderaadt 
62a58c1ecbStedu 			if (cclmec > 0) {
63df930be7Sderaadt 				ccltbl[cclp + newlen] = cclmec;
64df930be7Sderaadt 				++newlen;
65df930be7Sderaadt 			}
66df930be7Sderaadt 		}
67df930be7Sderaadt 
68df930be7Sderaadt 		ccllen[i] = newlen;
69df930be7Sderaadt 	}
70df930be7Sderaadt }
71df930be7Sderaadt 
72df930be7Sderaadt 
73df930be7Sderaadt /* cre8ecs - associate equivalence class numbers with class members
74df930be7Sderaadt  *
75df930be7Sderaadt  * fwd is the forward linked-list of equivalence class members.  bck
76df930be7Sderaadt  * is the backward linked-list, and num is the number of class members.
77df930be7Sderaadt  *
78df930be7Sderaadt  * Returned is the number of classes.
79df930be7Sderaadt  */
80df930be7Sderaadt 
81*8880fbfeStedu int
cre8ecs(int * fwd,int * bck,int num)82*8880fbfeStedu cre8ecs(int *fwd, int *bck, int num)
83df930be7Sderaadt {
84df930be7Sderaadt 	int i, j, numcl;
85df930be7Sderaadt 
86df930be7Sderaadt 	numcl = 0;
87df930be7Sderaadt 
88*8880fbfeStedu 	/*
89*8880fbfeStedu 	 * Create equivalence class numbers.  From now on, ABS( bck(x) ) is
90*8880fbfeStedu 	 * the equivalence class number for object x.  If bck(x) is positive,
91*8880fbfeStedu 	 * then x is the representative of its equivalence class.
92df930be7Sderaadt 	 */
93df930be7Sderaadt 	for (i = 1; i <= num; ++i)
94a58c1ecbStedu 		if (bck[i] == NIL) {
95df930be7Sderaadt 			bck[i] = ++numcl;
96df930be7Sderaadt 			for (j = fwd[i]; j != NIL; j = fwd[j])
97df930be7Sderaadt 				bck[j] = -numcl;
98df930be7Sderaadt 		}
99df930be7Sderaadt 	return numcl;
100df930be7Sderaadt }
101df930be7Sderaadt 
102df930be7Sderaadt 
103df930be7Sderaadt /* mkeccl - update equivalence classes based on character class xtions
104df930be7Sderaadt  *
105df930be7Sderaadt  * synopsis
10673902435Smmcc  *    u_char ccls[];
107df930be7Sderaadt  *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
10873902435Smmcc  *    void mkeccl( u_char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
109df930be7Sderaadt  *			int llsiz, int NUL_mapping );
110df930be7Sderaadt  *
111df930be7Sderaadt  * ccls contains the elements of the character class, lenccl is the
112df930be7Sderaadt  * number of elements in the ccl, fwd is the forward link-list of equivalent
113df930be7Sderaadt  * characters, bck is the backward link-list, and llsiz size of the link-list.
114df930be7Sderaadt  *
115df930be7Sderaadt  * NUL_mapping is the value which NUL (0) should be mapped to.
116df930be7Sderaadt  */
117df930be7Sderaadt 
118*8880fbfeStedu void
mkeccl(u_char * ccls,int lenccl,int * fwd,int * bck,int llsiz,int NUL_mapping)119*8880fbfeStedu mkeccl(u_char *ccls, int lenccl, int *fwd, int *bck, int llsiz, int NUL_mapping)
120df930be7Sderaadt {
121df930be7Sderaadt 	int cclp, oldec, newec;
122df930be7Sderaadt 	int cclm, i, j;
123df930be7Sderaadt 	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
124df930be7Sderaadt 
125*8880fbfeStedu 	/*
126*8880fbfeStedu 	 * Note that it doesn't matter whether or not the character class is
127df930be7Sderaadt 	 * negated.  The same results will be obtained in either case.
128df930be7Sderaadt 	 */
129df930be7Sderaadt 
130df930be7Sderaadt 	cclp = 0;
131df930be7Sderaadt 
132a58c1ecbStedu 	while (cclp < lenccl) {
133df930be7Sderaadt 		cclm = ccls[cclp];
134df930be7Sderaadt 
135df930be7Sderaadt 		if (NUL_mapping && cclm == 0)
136df930be7Sderaadt 			cclm = NUL_mapping;
137df930be7Sderaadt 
138df930be7Sderaadt 		oldec = bck[cclm];
139df930be7Sderaadt 		newec = cclm;
140df930be7Sderaadt 
141df930be7Sderaadt 		j = cclp + 1;
142df930be7Sderaadt 
143*8880fbfeStedu 		for (i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i]) {
144*8880fbfeStedu 			/* look for the symbol in the character class */
145a58c1ecbStedu 			for (; j < lenccl; ++j) {
146c0932ef1Smpech 				int ccl_char;
147df930be7Sderaadt 
148df930be7Sderaadt 				if (NUL_mapping && ccls[j] == 0)
149df930be7Sderaadt 					ccl_char = NUL_mapping;
150df930be7Sderaadt 				else
151df930be7Sderaadt 					ccl_char = ccls[j];
152df930be7Sderaadt 
153df930be7Sderaadt 				if (ccl_char > i)
154df930be7Sderaadt 					break;
155df930be7Sderaadt 
156a58c1ecbStedu 				if (ccl_char == i && !cclflags[j]) {
157*8880fbfeStedu 					/*
158*8880fbfeStedu 					 * We found an old companion of cclm
159df930be7Sderaadt 					 * in the ccl.  Link it into the new
160df930be7Sderaadt 					 * equivalence class and flag it as
161df930be7Sderaadt 					 * having been processed.
162df930be7Sderaadt 					 */
163df930be7Sderaadt 
164df930be7Sderaadt 					bck[i] = newec;
165df930be7Sderaadt 					fwd[newec] = i;
166df930be7Sderaadt 					newec = i;
167df930be7Sderaadt 					/* Set flag so we don't reprocess. */
168df930be7Sderaadt 					cclflags[j] = 1;
169df930be7Sderaadt 
170df930be7Sderaadt 					/* Get next equivalence class member. */
171df930be7Sderaadt 					/* continue 2 */
172df930be7Sderaadt 					goto next_pt;
173df930be7Sderaadt 				}
174df930be7Sderaadt 			}
175df930be7Sderaadt 
176*8880fbfeStedu 			/*
177*8880fbfeStedu 			 * Symbol isn't in character class.  Put it in the
178*8880fbfeStedu 			 * old equivalence class.
179df930be7Sderaadt 			 */
180df930be7Sderaadt 
181df930be7Sderaadt 			bck[i] = oldec;
182df930be7Sderaadt 
183df930be7Sderaadt 			if (oldec != NIL)
184df930be7Sderaadt 				fwd[oldec] = i;
185df930be7Sderaadt 
186df930be7Sderaadt 			oldec = i;
187df930be7Sderaadt 
188df930be7Sderaadt 	next_pt:	;
189df930be7Sderaadt 		}
190df930be7Sderaadt 
191a58c1ecbStedu 		if (bck[cclm] != NIL || oldec != bck[cclm]) {
192df930be7Sderaadt 			bck[cclm] = NIL;
193df930be7Sderaadt 			fwd[oldec] = NIL;
194df930be7Sderaadt 		}
195df930be7Sderaadt 		fwd[newec] = NIL;
196df930be7Sderaadt 
197df930be7Sderaadt 		/* Find next ccl member to process. */
198df930be7Sderaadt 
199a58c1ecbStedu 		for (++cclp; cclflags[cclp] && cclp < lenccl; ++cclp) {
200df930be7Sderaadt 			/* Reset "doesn't need processing" flag. */
201df930be7Sderaadt 			cclflags[cclp] = 0;
202df930be7Sderaadt 		}
203df930be7Sderaadt 	}
204df930be7Sderaadt }
205df930be7Sderaadt 
206df930be7Sderaadt 
207df930be7Sderaadt /* mkechar - create equivalence class for single character */
208df930be7Sderaadt 
209*8880fbfeStedu void
mkechar(int tch,int * fwd,int * bck)210*8880fbfeStedu mkechar(int tch, int *fwd, int *bck)
211df930be7Sderaadt {
212*8880fbfeStedu 	/*
213*8880fbfeStedu 	 * If until now the character has been a proper subset of an
214*8880fbfeStedu 	 * equivalence class, break it away to create a new ec
215df930be7Sderaadt 	 */
216df930be7Sderaadt 
217df930be7Sderaadt 	if (fwd[tch] != NIL)
218df930be7Sderaadt 		bck[fwd[tch]] = bck[tch];
219df930be7Sderaadt 
220df930be7Sderaadt 	if (bck[tch] != NIL)
221df930be7Sderaadt 		fwd[bck[tch]] = fwd[tch];
222df930be7Sderaadt 
223df930be7Sderaadt 	fwd[tch] = NIL;
224df930be7Sderaadt 	bck[tch] = NIL;
225df930be7Sderaadt }
226