xref: /openbsd/usr.bin/lex/ecs.c (revision 8880fbfe)
1 /*	$OpenBSD: ecs.c,v 1.9 2015/11/20 18:54:49 tedu Exp $	*/
2 
3 /* ecs - equivalence class routines */
4 
5 /*  Copyright (c) 1990 The Regents of the University of California. */
6 /*  All rights reserved. */
7 
8 /*  This code is derived from software contributed to Berkeley by */
9 /*  Vern Paxson. */
10 
11 /*  The United States Government has rights in this work pursuant */
12 /*  to contract no. DE-AC03-76SF00098 between the United States */
13 /*  Department of Energy and the University of California. */
14 
15 /* This file is part of flex */
16 
17 /*  Redistribution and use in source and binary forms, with or without */
18 /*  modification, are permitted provided that the following conditions */
19 /*  are met: */
20 
21 /*  1. Redistributions of source code must retain the above copyright */
22 /*     notice, this list of conditions and the following disclaimer. */
23 /*  2. Redistributions in binary form must reproduce the above copyright */
24 /*     notice, this list of conditions and the following disclaimer in the */
25 /*     documentation and/or other materials provided with the distribution. */
26 
27 /*  Neither the name of the University nor the names of its contributors */
28 /*  may be used to endorse or promote products derived from this software */
29 /*  without specific prior written permission. */
30 
31 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34 /*  PURPOSE. */
35 
36 
37 #include "flexdef.h"
38 
39 /* ccl2ecl - convert character classes to set of equivalence classes */
40 
41 void
ccl2ecl(void)42 ccl2ecl(void)
43 {
44 	int i, ich, newlen, cclp, ccls, cclmec;
45 
46 	for (i = 1; i <= lastccl; ++i) {
47 		/*
48 		 * We loop through each character class, and for each
49 		 * character in the class, add the character's equivalence
50 		 * class to the new "character" class we are creating.  Thus
51 		 * when we are all done, character classes will really
52 		 * consist of collections of equivalence classes
53 		 */
54 
55 		newlen = 0;
56 		cclp = cclmap[i];
57 
58 		for (ccls = 0; ccls < ccllen[i]; ++ccls) {
59 			ich = ccltbl[cclp + ccls];
60 			cclmec = ecgroup[ich];
61 
62 			if (cclmec > 0) {
63 				ccltbl[cclp + newlen] = cclmec;
64 				++newlen;
65 			}
66 		}
67 
68 		ccllen[i] = newlen;
69 	}
70 }
71 
72 
73 /* cre8ecs - associate equivalence class numbers with class members
74  *
75  * fwd is the forward linked-list of equivalence class members.  bck
76  * is the backward linked-list, and num is the number of class members.
77  *
78  * Returned is the number of classes.
79  */
80 
81 int
cre8ecs(int * fwd,int * bck,int num)82 cre8ecs(int *fwd, int *bck, int num)
83 {
84 	int i, j, numcl;
85 
86 	numcl = 0;
87 
88 	/*
89 	 * Create equivalence class numbers.  From now on, ABS( bck(x) ) is
90 	 * the equivalence class number for object x.  If bck(x) is positive,
91 	 * then x is the representative of its equivalence class.
92 	 */
93 	for (i = 1; i <= num; ++i)
94 		if (bck[i] == NIL) {
95 			bck[i] = ++numcl;
96 			for (j = fwd[i]; j != NIL; j = fwd[j])
97 				bck[j] = -numcl;
98 		}
99 	return numcl;
100 }
101 
102 
103 /* mkeccl - update equivalence classes based on character class xtions
104  *
105  * synopsis
106  *    u_char ccls[];
107  *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
108  *    void mkeccl( u_char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
109  *			int llsiz, int NUL_mapping );
110  *
111  * ccls contains the elements of the character class, lenccl is the
112  * number of elements in the ccl, fwd is the forward link-list of equivalent
113  * characters, bck is the backward link-list, and llsiz size of the link-list.
114  *
115  * NUL_mapping is the value which NUL (0) should be mapped to.
116  */
117 
118 void
mkeccl(u_char * ccls,int lenccl,int * fwd,int * bck,int llsiz,int NUL_mapping)119 mkeccl(u_char *ccls, int lenccl, int *fwd, int *bck, int llsiz, int NUL_mapping)
120 {
121 	int cclp, oldec, newec;
122 	int cclm, i, j;
123 	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
124 
125 	/*
126 	 * Note that it doesn't matter whether or not the character class is
127 	 * negated.  The same results will be obtained in either case.
128 	 */
129 
130 	cclp = 0;
131 
132 	while (cclp < lenccl) {
133 		cclm = ccls[cclp];
134 
135 		if (NUL_mapping && cclm == 0)
136 			cclm = NUL_mapping;
137 
138 		oldec = bck[cclm];
139 		newec = cclm;
140 
141 		j = cclp + 1;
142 
143 		for (i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i]) {
144 			/* look for the symbol in the character class */
145 			for (; j < lenccl; ++j) {
146 				int ccl_char;
147 
148 				if (NUL_mapping && ccls[j] == 0)
149 					ccl_char = NUL_mapping;
150 				else
151 					ccl_char = ccls[j];
152 
153 				if (ccl_char > i)
154 					break;
155 
156 				if (ccl_char == i && !cclflags[j]) {
157 					/*
158 					 * We found an old companion of cclm
159 					 * in the ccl.  Link it into the new
160 					 * equivalence class and flag it as
161 					 * having been processed.
162 					 */
163 
164 					bck[i] = newec;
165 					fwd[newec] = i;
166 					newec = i;
167 					/* Set flag so we don't reprocess. */
168 					cclflags[j] = 1;
169 
170 					/* Get next equivalence class member. */
171 					/* continue 2 */
172 					goto next_pt;
173 				}
174 			}
175 
176 			/*
177 			 * Symbol isn't in character class.  Put it in the
178 			 * old equivalence class.
179 			 */
180 
181 			bck[i] = oldec;
182 
183 			if (oldec != NIL)
184 				fwd[oldec] = i;
185 
186 			oldec = i;
187 
188 	next_pt:	;
189 		}
190 
191 		if (bck[cclm] != NIL || oldec != bck[cclm]) {
192 			bck[cclm] = NIL;
193 			fwd[oldec] = NIL;
194 		}
195 		fwd[newec] = NIL;
196 
197 		/* Find next ccl member to process. */
198 
199 		for (++cclp; cclflags[cclp] && cclp < lenccl; ++cclp) {
200 			/* Reset "doesn't need processing" flag. */
201 			cclflags[cclp] = 0;
202 		}
203 	}
204 }
205 
206 
207 /* mkechar - create equivalence class for single character */
208 
209 void
mkechar(int tch,int * fwd,int * bck)210 mkechar(int tch, int *fwd, int *bck)
211 {
212 	/*
213 	 * If until now the character has been a proper subset of an
214 	 * equivalence class, break it away to create a new ec
215 	 */
216 
217 	if (fwd[tch] != NIL)
218 		bck[fwd[tch]] = bck[tch];
219 
220 	if (bck[tch] != NIL)
221 		fwd[bck[tch]] = fwd[tch];
222 
223 	fwd[tch] = NIL;
224 	bck[tch] = NIL;
225 }
226