1 /*- 2 * Copyright (c) 1992 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Christos Zoulas of Cornell University. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if !defined(lint) && !defined(SCCSID) 12 static char sccsid[] = "@(#)tokenizer.c 5.2 (Berkeley) 07/03/92"; 13 #endif /* not lint && not SCCSID */ 14 15 /* 16 * tokenize.c: Bourne shell like tokenizer 17 */ 18 #include "sys.h" 19 #include <string.h> 20 #include <stdlib.h> 21 #include "tokenizer.h" 22 23 typedef enum { Q_none, Q_single, Q_double, Q_one, Q_doubleone } quote_t; 24 25 #define IFS "\t \n" 26 27 #define TOK_KEEP 1 28 #define TOK_EAT 2 29 30 #define WINCR 20 31 #define AINCR 10 32 33 #define tok_malloc(a) malloc(a) 34 #define tok_free(a) free(a) 35 #define tok_realloc(a, b) realloc(a, b) 36 37 38 struct tokenizer { 39 char *ifs; /* In field separator */ 40 int argc, amax; /* Current and maximum number of args */ 41 char **argv; /* Argument list */ 42 char *wptr, *wmax; /* Space and limit on the word buffer */ 43 char *wstart; /* Beginning of next word */ 44 char *wspace; /* Space of word buffer */ 45 quote_t quote; /* Quoting state */ 46 int flags; /* flags; */ 47 }; 48 49 50 private void tok_finish __P((Tokenizer *)); 51 52 53 /* tok_finish(): 54 * Finish a word in the tokenizer. 55 */ 56 private void 57 tok_finish(tok) 58 Tokenizer *tok; 59 { 60 *tok->wptr = '\0'; 61 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) { 62 tok->argv[tok->argc++] = tok->wstart; 63 tok->argv[tok->argc] = NULL; 64 tok->wstart = ++tok->wptr; 65 } 66 tok->flags &= ~TOK_KEEP; 67 } 68 69 70 /* tok_init(): 71 * Initialize the tokenizer 72 */ 73 public Tokenizer * 74 tok_init(ifs) 75 const char *ifs; 76 { 77 Tokenizer* tok = (Tokenizer*) tok_malloc(sizeof(Tokenizer)); 78 79 tok->ifs = strdup(ifs ? ifs : IFS); 80 tok->argc = 0; 81 tok->amax = AINCR; 82 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax); 83 tok->argv[0] = NULL; 84 tok->wspace = (char *) tok_malloc(WINCR); 85 tok->wmax = tok->wspace + WINCR; 86 tok->wstart = tok->wspace; 87 tok->wptr = tok->wspace; 88 tok->flags = 0; 89 tok->quote = Q_none; 90 91 return tok; 92 } 93 94 95 /* tok_reset(): 96 * Reset the tokenizer 97 */ 98 public void 99 tok_reset(tok) 100 Tokenizer *tok; 101 { 102 tok->argc = 0; 103 tok->wstart = tok->wspace; 104 tok->wptr = tok->wspace; 105 tok->flags = 0; 106 tok->quote = Q_none; 107 } 108 109 110 /* tok_end(): 111 * Clean up 112 */ 113 public void 114 tok_end(tok) 115 Tokenizer *tok; 116 { 117 tok_free((ptr_t) tok->ifs); 118 tok_free((ptr_t) tok->wspace); 119 tok_free((ptr_t) tok->argv); 120 tok_free((ptr_t) tok); 121 } 122 123 124 125 /* tok_line(): 126 * Bourne shell like tokenizing 127 * Return: 128 * -1: Internal error 129 * 3: Quoted return 130 * 2: Unmatched double quote 131 * 1: Unmatched single quote 132 * 0: Ok 133 */ 134 public int 135 tok_line(tok, line, argc, argv) 136 Tokenizer *tok; 137 const char* line; 138 int *argc; 139 char ***argv; 140 { 141 const char *ptr; 142 143 while (1) { 144 switch (*(ptr = line++)) { 145 case '\'': 146 tok->flags |= TOK_KEEP; 147 tok->flags &= ~TOK_EAT; 148 switch (tok->quote) { 149 case Q_none: 150 tok->quote = Q_single; /* Enter single quote mode */ 151 break; 152 153 case Q_single: /* Exit single quote mode */ 154 tok->quote = Q_none; 155 break; 156 157 case Q_one: /* Quote this ' */ 158 tok->quote = Q_none; 159 *tok->wptr++ = *ptr; 160 break; 161 162 case Q_double: /* Stay in double quote mode */ 163 *tok->wptr++ = *ptr; 164 break; 165 166 case Q_doubleone: /* Quote this ' */ 167 tok->quote = Q_double; 168 *tok->wptr++ = *ptr; 169 break; 170 171 default: 172 return(-1); 173 } 174 break; 175 176 case '"': 177 tok->flags &= ~TOK_EAT; 178 tok->flags |= TOK_KEEP; 179 switch (tok->quote) { 180 case Q_none: /* Enter double quote mode */ 181 tok->quote = Q_double; 182 break; 183 184 case Q_double: 185 tok->quote = Q_none; /* Exit double quote mode */ 186 break; 187 188 case Q_one: /* Quote this " */ 189 tok->quote = Q_none; 190 *tok->wptr++ = *ptr; 191 break; 192 193 case Q_single: /* Stay in single quote mode */ 194 *tok->wptr++ = *ptr; 195 break; 196 197 case Q_doubleone: /* Quote this " */ 198 tok->quote = Q_double; 199 *tok->wptr++ = *ptr; 200 break; 201 202 default: 203 return(-1); 204 } 205 break; 206 207 case '\\': 208 tok->flags |= TOK_KEEP; 209 tok->flags &= ~TOK_EAT; 210 switch (tok->quote) { 211 case Q_none: /* Quote next character */ 212 tok->quote = Q_one; 213 break; 214 215 case Q_double: 216 tok->quote = Q_doubleone;/* Quote next character */ 217 break; 218 219 case Q_one: 220 *tok->wptr++ = *ptr; 221 tok->quote = Q_none; /* Quote this, restore state */ 222 break; 223 224 case Q_single: /* Stay in single quote mode */ 225 *tok->wptr++ = *ptr; 226 break; 227 228 case Q_doubleone: /* Quote this \ */ 229 tok->quote = Q_double; 230 *tok->wptr++ = *ptr; 231 break; 232 233 default: 234 return(-1); 235 } 236 break; 237 238 case '\n': 239 tok->flags &= ~TOK_EAT; 240 switch (tok->quote) { 241 case Q_none: 242 tok_finish(tok); 243 *argv = tok->argv; 244 *argc = tok->argc; 245 return(0); 246 247 case Q_single: 248 case Q_double: 249 *tok->wptr++ = *ptr; /* Add the return */ 250 break; 251 252 case Q_doubleone: 253 tok->flags |= TOK_EAT; 254 tok->quote = Q_double; /* Back to double, eat the '\n' */ 255 break; 256 257 case Q_one: 258 tok->flags |= TOK_EAT; 259 tok->quote = Q_none; /* No quote, more eat the '\n' */ 260 break; 261 262 default: 263 return(0); 264 } 265 break; 266 267 case '\0': 268 switch (tok->quote) { 269 case Q_none: 270 /* Finish word and return */ 271 if (tok->flags & TOK_EAT) { 272 tok->flags &= ~TOK_EAT; 273 return 3; 274 } 275 tok_finish(tok); 276 *argv = tok->argv; 277 *argc = tok->argc; 278 return(0); 279 280 case Q_single: 281 return(1); 282 283 case Q_double: 284 return(2); 285 286 case Q_doubleone: 287 tok->quote = Q_double; 288 *tok->wptr++ = *ptr; 289 break; 290 291 case Q_one: 292 tok->quote = Q_none; 293 *tok->wptr++ = *ptr; 294 break; 295 296 default: 297 return(-1); 298 } 299 break; 300 301 default: 302 tok->flags &= ~TOK_EAT; 303 switch (tok->quote) { 304 case Q_none: 305 if (strchr(tok->ifs, *ptr) != NULL) 306 tok_finish(tok); 307 else 308 *tok->wptr++ = *ptr; 309 break; 310 311 case Q_single: 312 case Q_double: 313 *tok->wptr++ = *ptr; 314 break; 315 316 317 case Q_doubleone: 318 *tok->wptr++ = '\\'; 319 tok->quote = Q_double; 320 *tok->wptr++ = *ptr; 321 break; 322 323 case Q_one: 324 tok->quote = Q_none; 325 *tok->wptr++ = *ptr; 326 break; 327 328 default: 329 return(-1); 330 331 } 332 break; 333 } 334 335 if (tok->wptr >= tok->wmax - 4) { 336 size_t size = tok->wmax - tok->wspace + WINCR; 337 char *s = (char *) tok_realloc(tok->wspace, size); 338 /*SUPPRESS 22*/ 339 int offs = s - tok->wspace; 340 341 if (offs != 0) { 342 int i; 343 for (i = 0; i < tok->argc; i++) 344 tok->argv[i] = tok->argv[i] + offs; 345 tok->wptr = tok->wptr + offs; 346 tok->wstart = tok->wstart + offs; 347 tok->wmax = s + size; 348 tok->wspace = s; 349 } 350 } 351 352 if (tok->argc >= tok->amax - 4) { 353 tok->amax += AINCR; 354 tok->argv = (char **) tok_realloc(tok->argv, 355 tok->amax * sizeof(char*)); 356 } 357 358 } 359 } 360