1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #ifndef lint 9 static char sccsid[] = "@(#)str.c 5.3 (Berkeley) 10/27/91"; 10 #endif /* not lint */ 11 12 #include <sys/cdefs.h> 13 #include <sys/types.h> 14 #include <errno.h> 15 #include <stdio.h> 16 #include <stddef.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include "extern.h" 20 21 static int backslash __P((STR *)); 22 static int bracket __P((STR *)); 23 static int c_class __P((const void *, const void *)); 24 static void genclass __P((STR *)); 25 static void genequiv __P((STR *)); 26 static int genrange __P((STR *)); 27 static void genseq __P((STR *)); 28 29 int 30 next(s) 31 register STR *s; 32 { 33 register int ch; 34 35 switch (s->state) { 36 case EOS: 37 return (0); 38 case INFINITE: 39 return (1); 40 case NORMAL: 41 switch (ch = *s->str) { 42 case '\0': 43 s->state = EOS; 44 return (0); 45 case '\\': 46 s->lastch = backslash(s); 47 break; 48 case '[': 49 if (bracket(s)) 50 return (next(s)); 51 /* FALLTHROUGH */ 52 default: 53 ++s->str; 54 s->lastch = ch; 55 break; 56 } 57 58 /* We can start a range at any time. */ 59 if (s->str[0] == '-' && genrange(s)) 60 return (next(s)); 61 return (1); 62 case RANGE: 63 if (s->cnt-- == 0) { 64 s->state = NORMAL; 65 return (next(s)); 66 } 67 ++s->lastch; 68 return (1); 69 case SEQUENCE: 70 if (s->cnt-- == 0) { 71 s->state = NORMAL; 72 return (next(s)); 73 } 74 return (1); 75 case SET: 76 if ((s->lastch = s->set[s->cnt++]) == OOBCH) { 77 s->state = NORMAL; 78 return (next(s)); 79 } 80 return (1); 81 } 82 /* NOTREACHED */ 83 } 84 85 static int 86 bracket(s) 87 register STR *s; 88 { 89 register char *p; 90 91 switch (*++s->str) { 92 case ':': /* "[:class:]" */ 93 if ((p = strpbrk(s->str + 1, ":]")) == NULL) 94 return (0); 95 if (p[0] != ':' || p[1] != ']') 96 return (0); 97 *p = '\0'; 98 ++s->str; 99 genclass(s); 100 s->str = p + 2; 101 return (1); 102 case '=': /* "[=equiv=]" */ 103 if ((p = strpbrk(s->str + 1, "=]")) == NULL) 104 return (0); 105 if (p[0] != '=' || p[1] != ']') 106 return (0); 107 genequiv(s); 108 return (1); 109 default: /* "[\###*]" or "[#*]" */ 110 if ((p = strpbrk(s->str + 1, "*]")) == NULL) 111 return (0); 112 if (p[0] != '*' || index(p, ']') == NULL) 113 return (0); 114 genseq(s); 115 return (1); 116 } 117 /* NOTREACHED */ 118 } 119 120 int isalnum __P((int)), 121 isalpha __P((int)), 122 isblank __P((int)), 123 isspace __P((int)), 124 iscntrl __P((int)), 125 isdigit __P((int)), 126 isgraph __P((int)), 127 islower __P((int)), 128 isprint __P((int)), 129 ispunct __P((int)), 130 isupper __P((int)), 131 isxdigit __P((int)); 132 133 typedef struct { 134 char *name; 135 int (*func) __P((int)); 136 int *set; 137 } CLASS; 138 139 static CLASS classes[] = { 140 { "alnum", isalnum, }, 141 { "alpha", isalpha, }, 142 { "blank", isblank, }, 143 { "cntrl", iscntrl, }, 144 { "digit", isdigit, }, 145 { "graph", isgraph, }, 146 { "lower", islower, }, 147 { "print", isupper, }, 148 { "punct", ispunct, }, 149 { "space", isspace, }, 150 { "upper", isupper, }, 151 { "xdigit", isxdigit, }, 152 }; 153 154 static void 155 genclass(s) 156 STR *s; 157 { 158 register int cnt, (*func) __P((int)); 159 CLASS *cp, tmp; 160 int *p; 161 162 tmp.name = s->str; 163 if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / 164 sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) 165 err("unknown class %s", s->str); 166 167 if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) 168 err("%s", strerror(errno)); 169 bzero(p, (NCHARS + 1) * sizeof(int)); 170 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) 171 if ((func)(cnt)) 172 *p++ = cnt; 173 *p = OOBCH; 174 175 s->cnt = 0; 176 s->state = SET; 177 s->set = cp->set; 178 } 179 180 static int 181 c_class(a, b) 182 const void *a, *b; 183 { 184 return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); 185 } 186 187 /* 188 * English doesn't have any equivalence classes, so for now 189 * we just syntax check and grab the character. 190 */ 191 static void 192 genequiv(s) 193 STR *s; 194 { 195 if (*++s->str == '\\') { 196 s->equiv[0] = backslash(s); 197 if (*s->str != '=') 198 err("misplaced equivalence equals sign"); 199 } else { 200 s->equiv[0] = s->str[0]; 201 if (s->str[1] != '=') 202 err("misplaced equivalence equals sign"); 203 } 204 s->str += 2; 205 s->cnt = 0; 206 s->state = SET; 207 s->set = s->equiv; 208 } 209 210 static int 211 genrange(s) 212 STR *s; 213 { 214 int stopval; 215 char *savestart; 216 217 savestart = s->str; 218 stopval = *++s->str == '\\' ? backslash(s) : *s->str; 219 if (stopval < s->lastch) { 220 s->str = savestart; 221 return (0); 222 } 223 s->cnt = stopval - s->lastch + 1; 224 s->state = RANGE; 225 --s->lastch; 226 return (1); 227 } 228 229 static void 230 genseq(s) 231 STR *s; 232 { 233 char *ep; 234 235 if (s->which == STRING1) 236 err("sequences only valid in string2"); 237 238 if (*s->str == '\\') 239 s->lastch = backslash(s); 240 else 241 s->lastch = *s->str++; 242 if (*s->str != '*') 243 err("misplaced sequence asterisk"); 244 245 switch (*++s->str) { 246 case '\\': 247 s->cnt = backslash(s); 248 break; 249 case ']': 250 s->cnt = 0; 251 ++s->str; 252 break; 253 default: 254 if (isdigit(*s->str)) { 255 s->cnt = strtol(s->str, &ep, 0); 256 if (*ep == ']') { 257 s->str = ep + 1; 258 break; 259 } 260 } 261 err("illegal sequence count"); 262 /* NOTREACHED */ 263 } 264 265 s->state = s->cnt ? SEQUENCE : INFINITE; 266 } 267 268 /* Use the #defines isXXX() here, DON'T use them above. */ 269 #include <ctype.h> 270 271 /* 272 * Translate \??? into a character. Up to 3 octal digits, if no digits either 273 * an escape code or a literal character. 274 */ 275 static int 276 backslash(s) 277 register STR *s; 278 { 279 register int ch, cnt, val; 280 281 for (cnt = val = 0;;) { 282 ch = *++s->str; 283 if (!isascii(ch) || !isdigit(ch)) 284 break; 285 val = val * 8 + ch - '0'; 286 if (++cnt == 3) 287 break; 288 } 289 if (cnt) 290 return (val); 291 ++s->str; 292 switch (ch) { 293 case 'a': /* escape characters */ 294 return ('\7'); 295 case 'b': 296 return ('\b'); 297 case 'f': 298 return ('\f'); 299 case 'n': 300 return ('\n'); 301 case 'r': 302 return ('\r'); 303 case 't': 304 return ('\t'); 305 case 'v': 306 return ('\13'); 307 case '\0': /* \" -> \ */ 308 s->state = EOS; 309 return ('\\'); 310 default: /* \x" -> x */ 311 return (ch); 312 } 313 } 314