1 /* @(#)gmatch.c	1.23 21/02/24 2008-2021 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)gmatch.c	1.23 21/02/24 2008-2021 J. Schilling";
6 #endif
7 
8 #include <schily/mconfig.h>
9 
10 #ifdef	MY_GMATCH			/* #define to enforce this gmatch() */
11 #undef	HAVE_GMATCH			/* instead of gmatch() from -lgen   */
12 #endif
13 #ifdef	DO_POSIX_GMATCH			/* POSIX features like [:alpha:]    */
14 #undef	HAVE_GMATCH			/* need this gmatch()		    */
15 #endif
16 #ifndef	HAVE_GMATCH
17 
18 #include	<schily/limits.h>	/* MB_LEN_MAX */
19 #include	<schily/wchar.h>	/* includes stdio.h */
20 #include	<schily/wctype.h>	/* needed before we use wchar_t */
21 
22 /* -------- gmatch.c -------- */
23 /*
24  * int gmatch(string, pattern)
25  * char *string, *pattern;
26  *
27  * Match a pattern as in sh(1).
28  *
29  * This version is under BSD license.
30  * Originally written by Andrzej Bialecki <abial@FreeBSD.org>
31  * Rewritten for multi-byte support (c) 2012 by J. Schilling
32  * Rewritten to avoid recursion using the concepts from
33  * https://research.swtch.com/glob by J. Schilling
34  * Added POSIX pattern support (c) 2017 by J. Schilling
35  */
36 
37 #ifndef	NULL
38 #define	NULL	0
39 #endif
40 #define	NOT	'!'	/* might use ^ */
41 
42 static int cclass	__PR((const char *p, int sub, char **ret));
43 	int gmatch	__PR((const char *s, const char *p));
44 
45 #define	nextwc(p, c) \
46 		n = mbtowc(&lc, p, MB_LEN_MAX); \
47 		c = lc; \
48 		if (n < 0) { \
49 			c = *(const unsigned char *)p; \
50 			n = 1; \
51 			(void) mbtowc(NULL, NULL, 0); \
52 		} \
53 		p += n
54 
55 #define	CL_ERR		0	/* Error in pattern			*/
56 #define	CL_MATCH	1	/* Range OK, and match			*/
57 #define	CL_NOMATCH	2	/* Range OK, but no match		*/
58 
59 #define	CL_SIZE		32	/* Max size for '[: :]'			*/
60 
61 static int
cclass(p,sub,ret)62 cclass(p, sub, ret)
63 	register const char	*p;
64 	register int		sub;
65 		char		**ret;
66 {
67 	register int	c, d, found;
68 	register int	not = 0;
69 		wchar_t	lc;
70 		int	n;
71 
72 	if ((n = mbtowc(&lc, p, MB_LEN_MAX)) < 0) {
73 		lc = *(const unsigned char *)p++;
74 		(void) mbtowc(NULL, NULL, 0);
75 	} else if ((not = (lc == NOT)) != 0) {
76 		p += n;
77 	}
78 	found = not;
79 	do {
80 		if (*p == '\0')
81 			return (0);
82 
83 		nextwc(p, c);
84 		if (c == '\\') {
85 			nextwc(p, c);
86 		}
87 		if ((n = mbtowc(&lc, p, MB_LEN_MAX)) < 0) {	/* peek lc */
88 			lc = *(const unsigned char *)p;
89 			n = 1;
90 			(void) mbtowc(NULL, NULL, 0);
91 		}
92 #ifdef	DO_POSIX_GMATCH
93 		if (c == '[') {
94 			if (lc == ':') {
95 				char	class[CL_SIZE+1];
96 				char	*pc = class;
97 
98 				p += n;		/* Eat ':' */
99 				for (;;) {
100 					if (*p == '\0')
101 						return (0);
102 					if (*p == ':' && p[1] == ']')
103 						break;
104 					if (pc >= &class[CL_SIZE])
105 						return (0);
106 					*pc++ = *p++;
107 				}
108 				if (pc == class)
109 					return (0);
110 				*pc = '\0';
111 				p += 2;		/* Skip ":]" */
112 				if (iswctype(sub, wctype(class)))
113 					found = !not;
114 				if (*p == ']')	/* End of class */
115 					break;	/* parsing complete */
116 				continue;
117 			}
118 		}
119 #endif	/* DO_POSIX_GMATCH */
120 		if (lc == '-' && p[n] != ']') {
121 			p += n;			/* eat up lc peeked above */
122 			nextwc(p, d);
123 			if (d == '\\') {
124 				nextwc(p, d);
125 			}
126 			if ((n = mbtowc(&lc, p, MB_LEN_MAX)) < 0) {
127 				lc = *(const unsigned char *)p;
128 				n = 1;
129 				(void) mbtowc(NULL, NULL, 0);
130 			}
131 		} else {
132 			d = c;
133 		}
134 		if (c == sub || (c <= sub && sub <= d))
135 			found = !not;
136 	} while (lc != ']');
137 	*ret = (char *)p+n;			/* add len for '[' */
138 	return (found? CL_MATCH : CL_NOMATCH);
139 }
140 
141 int
gmatch(s,p)142 gmatch(s, p)
143 	register const char	*s;	/* The string to match	*/
144 	register const char	*p;	/* The pattern		*/
145 {
146 		const char *os;
147 		const char *bt_s;
148 		const char *bt_p;
149 	register wchar_t sc;
150 	register wchar_t pc;
151 		wchar_t	lc;
152 		int	n;
153 
154 	if (s == NULL || p == NULL)
155 		return (0);
156 
157 	bt_p = bt_s = NULL;
158 	while (*p != '\0') {
159 		os = s;
160 		nextwc(s, sc);
161 again:
162 		nextwc(p, pc);
163 
164 		switch (pc) {
165 		case '[': {
166 			char *p2;
167 
168 			if (sc == 0)
169 				return (0);
170 
171 			switch (cclass(p, sc, &p2)) {
172 			case CL_ERR:
173 #ifdef	GMATCH_CLERR_NORM
174 				goto def;
175 #endif
176 			case CL_NOMATCH:
177 				goto backtrack;
178 			}
179 
180 			p = p2;
181 			break;
182 		}
183 		case '?':
184 			if (sc == 0)
185 				return (0);
186 			break;
187 
188 		case '*':
189 			while (*p == '*')
190 				p++;
191 			if (*p == '\0')
192 				return (1);
193 
194 			bt_p = p;
195 			bt_s = os;
196 			goto again;
197 
198 		case '\\':
199 			nextwc(p, pc);
200 			if (pc == 0)
201 				return (0);
202 			/* FALLTROUGH */
203 
204 #ifdef	GMATCH_CLERR_NORM
205 		def:
206 #endif
207 		default:
208 			if (sc == pc) {
209 				;
210 			} else {
211 backtrack:
212 				if (bt_p == NULL)
213 					return (0);
214 				if (*bt_s == '\0')
215 					return (0);
216 
217 				nextwc(bt_s, sc);
218 				if (sc == '\0')
219 					return (0);
220 				p = bt_p;
221 				s = bt_s;
222 			}
223 		}
224 	}
225 	if (*s != 0)
226 		goto backtrack;
227 	return (1);
228 }
229 
230 #endif	/* HAVE_GMATCH */
231