1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #ifndef lint 9 static char sccsid[] = "@(#)str.c 5.6 (Berkeley) 01/16/92"; 10 #endif /* not lint */ 11 12 #include <sys/cdefs.h> 13 #include <sys/types.h> 14 #include <errno.h> 15 #include <stdio.h> 16 #include <stddef.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include "extern.h" 20 21 static int backslash __P((STR *)); 22 static int bracket __P((STR *)); 23 static int c_class __P((const void *, const void *)); 24 static void genclass __P((STR *)); 25 static void genequiv __P((STR *)); 26 static int genrange __P((STR *)); 27 static void genseq __P((STR *)); 28 29 int 30 next(s) 31 register STR *s; 32 { 33 register int ch; 34 35 switch (s->state) { 36 case EOS: 37 return (0); 38 case INFINITE: 39 return (1); 40 case NORMAL: 41 switch (ch = *s->str) { 42 case '\0': 43 s->state = EOS; 44 return (0); 45 case '\\': 46 s->lastch = backslash(s); 47 break; 48 case '[': 49 if (bracket(s)) 50 return (next(s)); 51 /* FALLTHROUGH */ 52 default: 53 ++s->str; 54 s->lastch = ch; 55 break; 56 } 57 58 /* We can start a range at any time. */ 59 if (s->str[0] == '-' && genrange(s)) 60 return (next(s)); 61 return (1); 62 case RANGE: 63 if (s->cnt-- == 0) { 64 s->state = NORMAL; 65 return (next(s)); 66 } 67 ++s->lastch; 68 return (1); 69 case SEQUENCE: 70 if (s->cnt-- == 0) { 71 s->state = NORMAL; 72 return (next(s)); 73 } 74 return (1); 75 case SET: 76 if ((s->lastch = s->set[s->cnt++]) == OOBCH) { 77 s->state = NORMAL; 78 return (next(s)); 79 } 80 return (1); 81 } 82 /* NOTREACHED */ 83 } 84 85 static int 86 bracket(s) 87 register STR *s; 88 { 89 register char *p; 90 91 switch (s->str[1]) { 92 case ':': /* "[:class:]" */ 93 if ((p = strstr(s->str + 2, ":]")) == NULL) 94 return (0); 95 *p = '\0'; 96 s->str += 2; 97 genclass(s); 98 s->str = p + 2; 99 return (1); 100 case '=': /* "[=equiv=]" */ 101 if ((p = strstr(s->str + 2, "=]")) == NULL) 102 return (0); 103 s->str += 2; 104 genequiv(s); 105 return (1); 106 default: /* "[\###*n]" or "[#*n]" */ 107 if ((p = strpbrk(s->str + 2, "*]")) == NULL) 108 return (0); 109 if (p[0] != '*' || index(p, ']') == NULL) 110 return (0); 111 s->str += 1; 112 genseq(s); 113 return (1); 114 } 115 /* NOTREACHED */ 116 } 117 118 int isalnum __P((int)), 119 isalpha __P((int)), 120 isblank __P((int)), 121 isspace __P((int)), 122 iscntrl __P((int)), 123 isdigit __P((int)), 124 isgraph __P((int)), 125 islower __P((int)), 126 isprint __P((int)), 127 ispunct __P((int)), 128 isupper __P((int)), 129 isxdigit __P((int)); 130 131 typedef struct { 132 char *name; 133 int (*func) __P((int)); 134 int *set; 135 } CLASS; 136 137 static CLASS classes[] = { 138 { "alnum", isalnum, }, 139 { "alpha", isalpha, }, 140 { "blank", isblank, }, 141 { "cntrl", iscntrl, }, 142 { "digit", isdigit, }, 143 { "graph", isgraph, }, 144 { "lower", islower, }, 145 { "print", isupper, }, 146 { "punct", ispunct, }, 147 { "space", isspace, }, 148 { "upper", isupper, }, 149 { "xdigit", isxdigit, }, 150 }; 151 152 static void 153 genclass(s) 154 STR *s; 155 { 156 register int cnt, (*func) __P((int)); 157 CLASS *cp, tmp; 158 int *p; 159 160 tmp.name = s->str; 161 if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / 162 sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) 163 err("unknown class %s", s->str); 164 165 if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) 166 err("%s", strerror(errno)); 167 bzero(p, (NCHARS + 1) * sizeof(int)); 168 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) 169 if ((func)(cnt)) 170 *p++ = cnt; 171 *p = OOBCH; 172 173 s->cnt = 0; 174 s->state = SET; 175 s->set = cp->set; 176 } 177 178 static int 179 c_class(a, b) 180 const void *a, *b; 181 { 182 return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); 183 } 184 185 /* 186 * English doesn't have any equivalence classes, so for now 187 * we just syntax check and grab the character. 188 */ 189 static void 190 genequiv(s) 191 STR *s; 192 { 193 if (*s->str == '\\') { 194 s->equiv[0] = backslash(s); 195 if (*s->str != '=') 196 err("misplaced equivalence equals sign"); 197 } else { 198 s->equiv[0] = s->str[0]; 199 if (s->str[1] != '=') 200 err("misplaced equivalence equals sign"); 201 } 202 s->str += 2; 203 s->cnt = 0; 204 s->state = SET; 205 s->set = s->equiv; 206 } 207 208 static int 209 genrange(s) 210 STR *s; 211 { 212 int stopval; 213 char *savestart; 214 215 savestart = s->str; 216 stopval = *++s->str == '\\' ? backslash(s) : *s->str; 217 if (stopval < s->lastch) { 218 s->str = savestart; 219 return (0); 220 } 221 s->cnt = stopval - s->lastch + 1; 222 s->state = RANGE; 223 --s->lastch; 224 return (1); 225 } 226 227 static void 228 genseq(s) 229 STR *s; 230 { 231 char *ep; 232 233 if (s->which == STRING1) 234 err("sequences only valid in string2"); 235 236 if (*s->str == '\\') 237 s->lastch = backslash(s); 238 else 239 s->lastch = *s->str++; 240 if (*s->str != '*') 241 err("misplaced sequence asterisk"); 242 243 switch (*++s->str) { 244 case '\\': 245 s->cnt = backslash(s); 246 break; 247 case ']': 248 s->cnt = 0; 249 ++s->str; 250 break; 251 default: 252 if (isdigit(*s->str)) { 253 s->cnt = strtol(s->str, &ep, 0); 254 if (*ep == ']') { 255 s->str = ep + 1; 256 break; 257 } 258 } 259 err("illegal sequence count"); 260 /* NOTREACHED */ 261 } 262 263 s->state = s->cnt ? SEQUENCE : INFINITE; 264 } 265 266 /* Use the #defines isXXX() here, DON'T use them above. */ 267 #include <ctype.h> 268 269 /* 270 * Translate \??? into a character. Up to 3 octal digits, if no digits either 271 * an escape code or a literal character. 272 */ 273 static int 274 backslash(s) 275 register STR *s; 276 { 277 register int ch, cnt, val; 278 279 for (cnt = val = 0;;) { 280 ch = *++s->str; 281 if (!isascii(ch) || !isdigit(ch)) 282 break; 283 val = val * 8 + ch - '0'; 284 if (++cnt == 3) 285 break; 286 } 287 if (ch != '\0') 288 ++s->str; 289 if (cnt) 290 return (val); 291 switch (ch) { 292 case 'a': /* escape characters */ 293 return ('\7'); 294 case 'b': 295 return ('\b'); 296 case 'f': 297 return ('\f'); 298 case 'n': 299 return ('\n'); 300 case 'r': 301 return ('\r'); 302 case 't': 303 return ('\t'); 304 case 'v': 305 return ('\13'); 306 case '\0': /* \" -> \ */ 307 s->state = EOS; 308 return ('\\'); 309 default: /* \x" -> x */ 310 return (ch); 311 } 312 } 313