1 /*
2 * tclParse.c --
3 *
4 * This file contains a collection of procedures that are used
5 * to parse Tcl commands or parts of commands (like quoted
6 * strings or nested sub-commands).
7 *
8 * Copyright (c) 1987-1993 The Regents of the University of California.
9 * Copyright (c) 1994-1996 Sun Microsystems, Inc.
10 *
11 * See the file "license.terms" for information on usage and redistribution
12 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13 *
14 * SCCS: @(#) tclParse.c 1.51 96/09/06 09:47:29
15 */
16
17 #include "tclInt.h"
18 #include "tclPort.h"
19
20 /*
21 * The following table assigns a type to each character. Only types
22 * meaningful to Tcl parsing are represented here. The table is
23 * designed to be referenced with either signed or unsigned characters,
24 * so it has 384 entries. The first 128 entries correspond to negative
25 * character values, the next 256 correspond to positive character
26 * values. The last 128 entries are identical to the first 128. The
27 * table is always indexed with a 128-byte offset (the 128th entry
28 * corresponds to a 0 character value).
29 */
30
31 char tclTypeTable[] = {
32 /*
33 * Negative character values, from -128 to -1:
34 */
35
36 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
37 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
38 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
39 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
40 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
41 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
42 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
43 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
44 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
45 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
46 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
47 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
48 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
49 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
50 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
51 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
52 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
53 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
54 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
55 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
56 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
57 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
58 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
59 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
60 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
61 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
62 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
63 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
64 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
65 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
66 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
67 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
68
69 /*
70 * Positive character values, from 0-127:
71 */
72
73 TCL_COMMAND_END, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
74 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
75 TCL_NORMAL, TCL_SPACE, TCL_COMMAND_END, TCL_SPACE,
76 TCL_SPACE, TCL_SPACE, TCL_NORMAL, TCL_NORMAL,
77 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
78 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
79 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
80 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
81 TCL_SPACE, TCL_NORMAL, TCL_QUOTE, TCL_NORMAL,
82 TCL_DOLLAR, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
83 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
84 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
85 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
86 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
87 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_COMMAND_END,
88 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
89 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
90 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
91 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
92 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
93 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
94 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
95 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_OPEN_BRACKET,
96 TCL_BACKSLASH, TCL_COMMAND_END, TCL_NORMAL, TCL_NORMAL,
97 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
98 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
99 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
100 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
101 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
102 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
103 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_OPEN_BRACE,
104 TCL_NORMAL, TCL_CLOSE_BRACE, TCL_NORMAL, TCL_NORMAL,
105
106 /*
107 * Large unsigned character values, from 128-255:
108 */
109
110 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
111 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
112 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
113 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
114 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
115 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
116 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
117 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
118 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
119 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
120 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
121 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
122 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
123 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
124 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
125 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
126 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
127 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
128 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
129 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
130 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
131 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
132 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
133 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
134 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
135 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
136 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
137 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
138 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
139 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
140 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
141 TCL_NORMAL, TCL_NORMAL, TCL_NORMAL, TCL_NORMAL,
142 };
143
144 /*
145 * Function prototypes for procedures local to this file:
146 */
147
148 static char * QuoteEnd _ANSI_ARGS_((char *string, int term));
149 static char * ScriptEnd _ANSI_ARGS_((char *p, int nested));
150 static char * VarNameEnd _ANSI_ARGS_((char *string));
151
152 /*
153 *----------------------------------------------------------------------
154 *
155 * Tcl_Backslash --
156 *
157 * Figure out how to handle a backslash sequence.
158 *
159 * Results:
160 * The return value is the character that should be substituted
161 * in place of the backslash sequence that starts at src. If
162 * readPtr isn't NULL then it is filled in with a count of the
163 * number of characters in the backslash sequence.
164 *
165 * Side effects:
166 * None.
167 *
168 *----------------------------------------------------------------------
169 */
170
171 char
Tcl_Backslash(src,readPtr)172 Tcl_Backslash(src, readPtr)
173 char *src; /* Points to the backslash character of
174 * a backslash sequence. */
175 int *readPtr; /* Fill in with number of characters read
176 * from src, unless NULL. */
177 {
178 register char *p = src+1;
179 char result;
180 int count;
181
182 count = 2;
183
184 switch (*p) {
185 /*
186 * Note: in the conversions below, use absolute values (e.g.,
187 * 0xa) rather than symbolic values (e.g. \n) that get converted
188 * by the compiler. It's possible that compilers on some
189 * platforms will do the symbolic conversions differently, which
190 * could result in non-portable Tcl scripts.
191 */
192
193 case 'a':
194 result = 0x7;
195 break;
196 case 'b':
197 result = 0x8;
198 break;
199 case 'f':
200 result = 0xc;
201 break;
202 case 'n':
203 result = 0xa;
204 break;
205 case 'r':
206 result = 0xd;
207 break;
208 case 't':
209 result = 0x9;
210 break;
211 case 'v':
212 result = 0xb;
213 break;
214 case 'x':
215 if (isxdigit(UCHAR(p[1]))) {
216 char *end;
217
218 result = (char) strtoul(p+1, &end, 16);
219 count = end - src;
220 } else {
221 count = 2;
222 result = 'x';
223 }
224 break;
225 case '\n':
226 do {
227 p++;
228 } while ((*p == ' ') || (*p == '\t'));
229 result = ' ';
230 count = p - src;
231 break;
232 case 0:
233 result = '\\';
234 count = 1;
235 break;
236 default:
237 if (isdigit(UCHAR(*p))) {
238 result = (char)(*p - '0');
239 p++;
240 if (!isdigit(UCHAR(*p))) {
241 break;
242 }
243 count = 3;
244 result = (char)((result << 3) + (*p - '0'));
245 p++;
246 if (!isdigit(UCHAR(*p))) {
247 break;
248 }
249 count = 4;
250 result = (char)((result << 3) + (*p - '0'));
251 break;
252 }
253 result = *p;
254 count = 2;
255 break;
256 }
257
258 if (readPtr != NULL) {
259 *readPtr = count;
260 }
261 return result;
262 }
263
264 /*
265 *--------------------------------------------------------------
266 *
267 * TclParseQuotes --
268 *
269 * This procedure parses a double-quoted string such as a
270 * quoted Tcl command argument or a quoted value in a Tcl
271 * expression. This procedure is also used to parse array
272 * element names within parentheses, or anything else that
273 * needs all the substitutions that happen in quotes.
274 *
275 * Results:
276 * The return value is a standard Tcl result, which is
277 * TCL_OK unless there was an error while parsing the
278 * quoted string. If an error occurs then interp->result
279 * contains a standard error message. *TermPtr is filled
280 * in with the address of the character just after the
281 * last one successfully processed; this is usually the
282 * character just after the matching close-quote. The
283 * fully-substituted contents of the quotes are stored in
284 * standard fashion in *pvPtr, null-terminated with
285 * pvPtr->next pointing to the terminating null character.
286 *
287 * Side effects:
288 * The buffer space in pvPtr may be enlarged by calling its
289 * expandProc.
290 *
291 *--------------------------------------------------------------
292 */
293
294 int
TclParseQuotes(interp,string,termChar,flags,termPtr,pvPtr)295 TclParseQuotes(interp, string, termChar, flags, termPtr, pvPtr)
296 Tcl_Interp *interp; /* Interpreter to use for nested command
297 * evaluations and error messages. */
298 char *string; /* Character just after opening double-
299 * quote. */
300 int termChar; /* Character that terminates "quoted" string
301 * (usually double-quote, but sometimes
302 * right-paren or something else). */
303 int flags; /* Flags to pass to nested Tcl_Eval calls. */
304 char **termPtr; /* Store address of terminating character
305 * here. */
306 ParseValue *pvPtr; /* Information about where to place
307 * fully-substituted result of parse. */
308 {
309 register char *src, *dst, c;
310
311 src = string;
312 dst = pvPtr->next;
313
314 while (1) {
315 if (dst == pvPtr->end) {
316 /*
317 * Target buffer space is about to run out. Make more space.
318 */
319
320 pvPtr->next = dst;
321 (*pvPtr->expandProc)(pvPtr, 1);
322 dst = pvPtr->next;
323 }
324
325 c = *src;
326 src++;
327 if (c == termChar) {
328 *dst = '\0';
329 pvPtr->next = dst;
330 *termPtr = src;
331 return TCL_OK;
332 } else if (CHAR_TYPE(c) == TCL_NORMAL) {
333 copy:
334 *dst = c;
335 dst++;
336 continue;
337 } else if (c == '$') {
338 int length;
339 char *value;
340
341 value = Tcl_ParseVar(interp, src-1, termPtr);
342 if (value == NULL) {
343 return TCL_ERROR;
344 }
345 src = *termPtr;
346 length = strlen(value);
347 if ((pvPtr->end - dst) <= length) {
348 pvPtr->next = dst;
349 (*pvPtr->expandProc)(pvPtr, length);
350 dst = pvPtr->next;
351 }
352 strcpy(dst, value);
353 dst += length;
354 continue;
355 } else if (c == '[') {
356 int result;
357
358 pvPtr->next = dst;
359 result = TclParseNestedCmd(interp, src, flags, termPtr, pvPtr);
360 if (result != TCL_OK) {
361 return result;
362 }
363 src = *termPtr;
364 dst = pvPtr->next;
365 continue;
366 } else if (c == '\\') {
367 int numRead;
368
369 src--;
370 *dst = Tcl_Backslash(src, &numRead);
371 dst++;
372 src += numRead;
373 continue;
374 } else if (c == '\0') {
375 Tcl_ResetResult(interp);
376 sprintf(interp->result, "missing %c", termChar);
377 *termPtr = string-1;
378 return TCL_ERROR;
379 } else {
380 goto copy;
381 }
382 }
383 }
384
385 /*
386 *--------------------------------------------------------------
387 *
388 * TclParseNestedCmd --
389 *
390 * This procedure parses a nested Tcl command between
391 * brackets, returning the result of the command.
392 *
393 * Results:
394 * The return value is a standard Tcl result, which is
395 * TCL_OK unless there was an error while executing the
396 * nested command. If an error occurs then interp->result
397 * contains a standard error message. *TermPtr is filled
398 * in with the address of the character just after the
399 * last one processed; this is usually the character just
400 * after the matching close-bracket, or the null character
401 * at the end of the string if the close-bracket was missing
402 * (a missing close bracket is an error). The result returned
403 * by the command is stored in standard fashion in *pvPtr,
404 * null-terminated, with pvPtr->next pointing to the null
405 * character.
406 *
407 * Side effects:
408 * The storage space at *pvPtr may be expanded.
409 *
410 *--------------------------------------------------------------
411 */
412
413 int
TclParseNestedCmd(interp,string,flags,termPtr,pvPtr)414 TclParseNestedCmd(interp, string, flags, termPtr, pvPtr)
415 Tcl_Interp *interp; /* Interpreter to use for nested command
416 * evaluations and error messages. */
417 char *string; /* Character just after opening bracket. */
418 int flags; /* Flags to pass to nested Tcl_Eval. */
419 char **termPtr; /* Store address of terminating character
420 * here. */
421 register ParseValue *pvPtr; /* Information about where to place
422 * result of command. */
423 {
424 int result, length, shortfall;
425 Interp *iPtr = (Interp *) interp;
426
427 iPtr->evalFlags = flags | TCL_BRACKET_TERM;
428 result = Tcl_Eval(interp, string);
429 *termPtr = iPtr->termPtr;
430 if (result != TCL_OK) {
431 /*
432 * The increment below results in slightly cleaner message in
433 * the errorInfo variable (the close-bracket will appear).
434 */
435
436 if (**termPtr == ']') {
437 *termPtr += 1;
438 }
439 return result;
440 }
441 (*termPtr) += 1;
442 length = strlen(iPtr->result);
443 shortfall = length + 1 - (pvPtr->end - pvPtr->next);
444 if (shortfall > 0) {
445 (*pvPtr->expandProc)(pvPtr, shortfall);
446 }
447 strcpy(pvPtr->next, iPtr->result);
448 pvPtr->next += length;
449 Tcl_FreeResult(iPtr);
450 iPtr->result = iPtr->resultSpace;
451 iPtr->resultSpace[0] = '\0';
452 return TCL_OK;
453 }
454
455 /*
456 *--------------------------------------------------------------
457 *
458 * TclParseBraces --
459 *
460 * This procedure scans the information between matching
461 * curly braces.
462 *
463 * Results:
464 * The return value is a standard Tcl result, which is
465 * TCL_OK unless there was an error while parsing string.
466 * If an error occurs then interp->result contains a
467 * standard error message. *TermPtr is filled
468 * in with the address of the character just after the
469 * last one successfully processed; this is usually the
470 * character just after the matching close-brace. The
471 * information between curly braces is stored in standard
472 * fashion in *pvPtr, null-terminated with pvPtr->next
473 * pointing to the terminating null character.
474 *
475 * Side effects:
476 * The storage space at *pvPtr may be expanded.
477 *
478 *--------------------------------------------------------------
479 */
480
481 int
TclParseBraces(interp,string,termPtr,pvPtr)482 TclParseBraces(interp, string, termPtr, pvPtr)
483 Tcl_Interp *interp; /* Interpreter to use for nested command
484 * evaluations and error messages. */
485 char *string; /* Character just after opening bracket. */
486 char **termPtr; /* Store address of terminating character
487 * here. */
488 register ParseValue *pvPtr; /* Information about where to place
489 * result of command. */
490 {
491 int level;
492 register char *src, *dst, *end;
493 register char c;
494
495 src = string;
496 dst = pvPtr->next;
497 end = pvPtr->end;
498 level = 1;
499
500 /*
501 * Copy the characters one at a time to the result area, stopping
502 * when the matching close-brace is found.
503 */
504
505 while (1) {
506 c = *src;
507 src++;
508 if (dst == end) {
509 pvPtr->next = dst;
510 (*pvPtr->expandProc)(pvPtr, 20);
511 dst = pvPtr->next;
512 end = pvPtr->end;
513 }
514 *dst = c;
515 dst++;
516 if (CHAR_TYPE(c) == TCL_NORMAL) {
517 continue;
518 } else if (c == '{') {
519 level++;
520 } else if (c == '}') {
521 level--;
522 if (level == 0) {
523 dst--; /* Don't copy the last close brace. */
524 break;
525 }
526 } else if (c == '\\') {
527 int count;
528
529 /*
530 * Must always squish out backslash-newlines, even when in
531 * braces. This is needed so that this sequence can appear
532 * anywhere in a command, such as the middle of an expression.
533 */
534
535 if (*src == '\n') {
536 dst[-1] = Tcl_Backslash(src-1, &count);
537 src += count - 1;
538 } else {
539 (void) Tcl_Backslash(src-1, &count);
540 while (count > 1) {
541 if (dst == end) {
542 pvPtr->next = dst;
543 (*pvPtr->expandProc)(pvPtr, 20);
544 dst = pvPtr->next;
545 end = pvPtr->end;
546 }
547 *dst = *src;
548 dst++;
549 src++;
550 count--;
551 }
552 }
553 } else if (c == '\0') {
554 Tcl_SetResult(interp, "missing close-brace", TCL_STATIC);
555 *termPtr = string-1;
556 return TCL_ERROR;
557 }
558 }
559
560 *dst = '\0';
561 pvPtr->next = dst;
562 *termPtr = src;
563 return TCL_OK;
564 }
565
566 /*
567 *--------------------------------------------------------------
568 *
569 * TclParseWords --
570 *
571 * This procedure parses one or more words from a command
572 * string and creates argv-style pointers to fully-substituted
573 * copies of those words.
574 *
575 * Results:
576 * The return value is a standard Tcl result.
577 *
578 * *argcPtr is modified to hold a count of the number of words
579 * successfully parsed, which may be 0. At most maxWords words
580 * will be parsed. If 0 <= *argcPtr < maxWords then it
581 * means that a command separator was seen. If *argcPtr
582 * is maxWords then it means that a command separator was
583 * not seen yet.
584 *
585 * *TermPtr is filled in with the address of the character
586 * just after the last one successfully processed in the
587 * last word. This is either the command terminator (if
588 * *argcPtr < maxWords), the character just after the last
589 * one in a word (if *argcPtr is maxWords), or the vicinity
590 * of an error (if the result is not TCL_OK).
591 *
592 * The pointers at *argv are filled in with pointers to the
593 * fully-substituted words, and the actual contents of the
594 * words are copied to the buffer at pvPtr.
595 *
596 * If an error occurrs then an error message is left in
597 * interp->result and the information at *argv, *argcPtr,
598 * and *pvPtr may be incomplete.
599 *
600 * Side effects:
601 * The buffer space in pvPtr may be enlarged by calling its
602 * expandProc.
603 *
604 *--------------------------------------------------------------
605 */
606
607 int
TclParseWords(interp,string,flags,maxWords,termPtr,argcPtr,argv,pvPtr)608 TclParseWords(interp, string, flags, maxWords, termPtr, argcPtr, argv, pvPtr)
609 Tcl_Interp *interp; /* Interpreter to use for nested command
610 * evaluations and error messages. */
611 char *string; /* First character of word. */
612 int flags; /* Flags to control parsing (same values as
613 * passed to Tcl_Eval). */
614 int maxWords; /* Maximum number of words to parse. */
615 char **termPtr; /* Store address of terminating character
616 * here. */
617 int *argcPtr; /* Filled in with actual number of words
618 * parsed. */
619 char **argv; /* Store addresses of individual words here. */
620 register ParseValue *pvPtr; /* Information about where to place
621 * fully-substituted word. */
622 {
623 register char *src, *dst;
624 register char c;
625 int type, result, argc;
626 char *oldBuffer; /* Used to detect when pvPtr's buffer gets
627 * reallocated, so we can adjust all of the
628 * argv pointers. */
629
630 src = string;
631 oldBuffer = pvPtr->buffer;
632 dst = pvPtr->next;
633 for (argc = 0; argc < maxWords; argc++) {
634 argv[argc] = dst;
635
636 /*
637 * Skip leading space.
638 */
639
640 skipSpace:
641 c = *src;
642 type = CHAR_TYPE(c);
643 while (type == TCL_SPACE) {
644 src++;
645 c = *src;
646 type = CHAR_TYPE(c);
647 }
648
649 /*
650 * Handle the normal case (i.e. no leading double-quote or brace).
651 */
652
653 if (type == TCL_NORMAL) {
654 normalArg:
655 while (1) {
656 if (dst == pvPtr->end) {
657 /*
658 * Target buffer space is about to run out. Make
659 * more space.
660 */
661
662 pvPtr->next = dst;
663 (*pvPtr->expandProc)(pvPtr, 1);
664 dst = pvPtr->next;
665 }
666
667 if (type == TCL_NORMAL) {
668 copy:
669 *dst = c;
670 dst++;
671 src++;
672 } else if (type == TCL_SPACE) {
673 goto wordEnd;
674 } else if (type == TCL_DOLLAR) {
675 int length;
676 char *value;
677
678 value = Tcl_ParseVar(interp, src, termPtr);
679 if (value == NULL) {
680 return TCL_ERROR;
681 }
682 src = *termPtr;
683 length = strlen(value);
684 if ((pvPtr->end - dst) <= length) {
685 pvPtr->next = dst;
686 (*pvPtr->expandProc)(pvPtr, length);
687 dst = pvPtr->next;
688 }
689 strcpy(dst, value);
690 dst += length;
691 } else if (type == TCL_COMMAND_END) {
692 if ((c == ']') && !(flags & TCL_BRACKET_TERM)) {
693 goto copy;
694 }
695
696 /*
697 * End of command; simulate a word-end first, so
698 * that the end-of-command can be processed as the
699 * first thing in a new word.
700 */
701
702 goto wordEnd;
703 } else if (type == TCL_OPEN_BRACKET) {
704 pvPtr->next = dst;
705 result = TclParseNestedCmd(interp, src+1, flags, termPtr,
706 pvPtr);
707 if (result != TCL_OK) {
708 return result;
709 }
710 src = *termPtr;
711 dst = pvPtr->next;
712 } else if (type == TCL_BACKSLASH) {
713 int numRead;
714
715 *dst = Tcl_Backslash(src, &numRead);
716
717 /*
718 * The following special check allows a backslash-newline
719 * to be treated as a word-separator, as if the backslash
720 * and newline had been collapsed before command parsing
721 * began.
722 */
723
724 if (src[1] == '\n') {
725 src += numRead;
726 goto wordEnd;
727 }
728 src += numRead;
729 dst++;
730 } else {
731 goto copy;
732 }
733 c = *src;
734 type = CHAR_TYPE(c);
735 }
736 } else {
737
738 /*
739 * Check for the end of the command.
740 */
741
742 if (type == TCL_COMMAND_END) {
743 if (flags & TCL_BRACKET_TERM) {
744 if (c == '\0') {
745 Tcl_SetResult(interp, "missing close-bracket",
746 TCL_STATIC);
747 return TCL_ERROR;
748 }
749 } else {
750 if (c == ']') {
751 goto normalArg;
752 }
753 }
754 goto done;
755 }
756
757 /*
758 * Now handle the special cases: open braces, double-quotes,
759 * and backslash-newline.
760 */
761
762 pvPtr->next = dst;
763 if (type == TCL_QUOTE) {
764 result = TclParseQuotes(interp, src+1, '"', flags,
765 termPtr, pvPtr);
766 } else if (type == TCL_OPEN_BRACE) {
767 result = TclParseBraces(interp, src+1, termPtr, pvPtr);
768 } else if ((type == TCL_BACKSLASH) && (src[1] == '\n')) {
769 /*
770 * This code is needed so that a backslash-newline at the
771 * very beginning of a word is treated as part of the white
772 * space between words and not as a space within the word.
773 */
774
775 src += 2;
776 goto skipSpace;
777 } else {
778 goto normalArg;
779 }
780 if (result != TCL_OK) {
781 return result;
782 }
783
784 /*
785 * Back from quotes or braces; make sure that the terminating
786 * character was the end of the word.
787 */
788
789 c = **termPtr;
790 if ((c == '\\') && ((*termPtr)[1] == '\n')) {
791 /*
792 * Line is continued on next line; the backslash-newline
793 * sequence turns into space, which is OK. No need to do
794 * anything here.
795 */
796 } else {
797 type = CHAR_TYPE(c);
798 if ((type != TCL_SPACE) && (type != TCL_COMMAND_END)) {
799 if (*src == '"') {
800 Tcl_SetResult(interp,
801 "extra characters after close-quote",
802 TCL_STATIC);
803 } else {
804 Tcl_SetResult(interp,
805 "extra characters after close-brace",
806 TCL_STATIC);
807 }
808 return TCL_ERROR;
809 }
810 }
811 src = *termPtr;
812 dst = pvPtr->next;
813 }
814
815 /*
816 * We're at the end of a word, so add a null terminator. Then
817 * see if the buffer was re-allocated during this word. If so,
818 * update all of the argv pointers.
819 */
820
821 wordEnd:
822 *dst = '\0';
823 dst++;
824 if (oldBuffer != pvPtr->buffer) {
825 int i;
826
827 for (i = 0; i <= argc; i++) {
828 argv[i] = pvPtr->buffer + (argv[i] - oldBuffer);
829 }
830 oldBuffer = pvPtr->buffer;
831 }
832 }
833
834 done:
835 pvPtr->next = dst;
836 *termPtr = src;
837 *argcPtr = argc;
838 return TCL_OK;
839 }
840
841 /*
842 *--------------------------------------------------------------
843 *
844 * TclExpandParseValue --
845 *
846 * This procedure is commonly used as the value of the
847 * expandProc in a ParseValue. It uses malloc to allocate
848 * more space for the result of a parse.
849 *
850 * Results:
851 * The buffer space in *pvPtr is reallocated to something
852 * larger, and if pvPtr->clientData is non-zero the old
853 * buffer is freed. Information is copied from the old
854 * buffer to the new one.
855 *
856 * Side effects:
857 * None.
858 *
859 *--------------------------------------------------------------
860 */
861
862 void
TclExpandParseValue(pvPtr,needed)863 TclExpandParseValue(pvPtr, needed)
864 register ParseValue *pvPtr; /* Information about buffer that
865 * must be expanded. If the clientData
866 * in the structure is non-zero, it
867 * means that the current buffer is
868 * dynamically allocated. */
869 int needed; /* Minimum amount of additional space
870 * to allocate. */
871 {
872 int newSpace;
873 char *new;
874
875 /*
876 * Either double the size of the buffer or add enough new space
877 * to meet the demand, whichever produces a larger new buffer.
878 */
879
880 newSpace = (pvPtr->end - pvPtr->buffer) + 1;
881 if (newSpace < needed) {
882 newSpace += needed;
883 } else {
884 newSpace += newSpace;
885 }
886 new = (char *) ckalloc((unsigned) newSpace);
887
888 /*
889 * Copy from old buffer to new, free old buffer if needed, and
890 * mark new buffer as malloc-ed.
891 */
892
893 memcpy((VOID *) new, (VOID *) pvPtr->buffer,
894 (size_t) (pvPtr->next - pvPtr->buffer));
895 pvPtr->next = new + (pvPtr->next - pvPtr->buffer);
896 if (pvPtr->clientData != 0) {
897 ckfree(pvPtr->buffer);
898 }
899 pvPtr->buffer = new;
900 pvPtr->end = new + newSpace - 1;
901 pvPtr->clientData = (ClientData) 1;
902 }
903
904 /*
905 *----------------------------------------------------------------------
906 *
907 * TclWordEnd --
908 *
909 * Given a pointer into a Tcl command, find the end of the next
910 * word of the command.
911 *
912 * Results:
913 * The return value is a pointer to the last character that's part
914 * of the word pointed to by "start". If the word doesn't end
915 * properly within the string then the return value is the address
916 * of the null character at the end of the string.
917 *
918 * Side effects:
919 * None.
920 *
921 *----------------------------------------------------------------------
922 */
923
924 char *
TclWordEnd(start,nested,semiPtr)925 TclWordEnd(start, nested, semiPtr)
926 char *start; /* Beginning of a word of a Tcl command. */
927 int nested; /* Zero means this is a top-level command.
928 * One means this is a nested command (close
929 * bracket is a word terminator). */
930 int *semiPtr; /* Set to 1 if word ends with a command-
931 * terminating semi-colon, zero otherwise.
932 * If NULL then ignored. */
933 {
934 register char *p;
935 int count;
936
937 if (semiPtr != NULL) {
938 *semiPtr = 0;
939 }
940
941 /*
942 * Skip leading white space (backslash-newline must be treated like
943 * white-space, except that it better not be the last thing in the
944 * command).
945 */
946
947 for (p = start; ; p++) {
948 if (isspace(UCHAR(*p))) {
949 continue;
950 }
951 if ((p[0] == '\\') && (p[1] == '\n')) {
952 if (p[2] == 0) {
953 return p+2;
954 }
955 continue;
956 }
957 break;
958 }
959
960 /*
961 * Handle words beginning with a double-quote or a brace.
962 */
963
964 if (*p == '"') {
965 p = QuoteEnd(p+1, '"');
966 if (*p == 0) {
967 return p;
968 }
969 p++;
970 } else if (*p == '{') {
971 int braces = 1;
972 while (braces != 0) {
973 p++;
974 while (*p == '\\') {
975 (void) Tcl_Backslash(p, &count);
976 p += count;
977 }
978 if (*p == '}') {
979 braces--;
980 } else if (*p == '{') {
981 braces++;
982 } else if (*p == 0) {
983 return p;
984 }
985 }
986 p++;
987 }
988
989 /*
990 * Handle words that don't start with a brace or double-quote.
991 * This code is also invoked if the word starts with a brace or
992 * double-quote and there is garbage after the closing brace or
993 * quote. This is an error as far as Tcl_Eval is concerned, but
994 * for here the garbage is treated as part of the word.
995 */
996
997 while (1) {
998 if (*p == '[') {
999 p = ScriptEnd(p+1, 1);
1000 if (*p == 0) {
1001 return p;
1002 }
1003 p++;
1004 } else if (*p == '\\') {
1005 if (p[1] == '\n') {
1006 /*
1007 * Backslash-newline: it maps to a space character
1008 * that is a word separator, so the word ends just before
1009 * the backslash.
1010 */
1011
1012 return p-1;
1013 }
1014 (void) Tcl_Backslash(p, &count);
1015 p += count;
1016 } else if (*p == '$') {
1017 p = VarNameEnd(p);
1018 if (*p == 0) {
1019 return p;
1020 }
1021 p++;
1022 } else if (*p == ';') {
1023 /*
1024 * Include the semi-colon in the word that is returned.
1025 */
1026
1027 if (semiPtr != NULL) {
1028 *semiPtr = 1;
1029 }
1030 return p;
1031 } else if (isspace(UCHAR(*p))) {
1032 return p-1;
1033 } else if ((*p == ']') && nested) {
1034 return p-1;
1035 } else if (*p == 0) {
1036 if (nested) {
1037 /*
1038 * Nested commands can't end because of the end of the
1039 * string.
1040 */
1041 return p;
1042 }
1043 return p-1;
1044 } else {
1045 p++;
1046 }
1047 }
1048 }
1049
1050 /*
1051 *----------------------------------------------------------------------
1052 *
1053 * QuoteEnd --
1054 *
1055 * Given a pointer to a string that obeys the parsing conventions
1056 * for quoted things in Tcl, find the end of that quoted thing.
1057 * The actual thing may be a quoted argument or a parenthesized
1058 * index name.
1059 *
1060 * Results:
1061 * The return value is a pointer to the last character that is
1062 * part of the quoted string (i.e the character that's equal to
1063 * term). If the quoted string doesn't terminate properly then
1064 * the return value is a pointer to the null character at the
1065 * end of the string.
1066 *
1067 * Side effects:
1068 * None.
1069 *
1070 *----------------------------------------------------------------------
1071 */
1072
1073 static char *
QuoteEnd(string,term)1074 QuoteEnd(string, term)
1075 char *string; /* Pointer to character just after opening
1076 * "quote". */
1077 int term; /* This character will terminate the
1078 * quoted string (e.g. '"' or ')'). */
1079 {
1080 register char *p = string;
1081 int count;
1082
1083 while (*p != term) {
1084 if (*p == '\\') {
1085 (void) Tcl_Backslash(p, &count);
1086 p += count;
1087 } else if (*p == '[') {
1088 for (p++; *p != ']'; p++) {
1089 p = TclWordEnd(p, 1, (int *) NULL);
1090 if (*p == 0) {
1091 return p;
1092 }
1093 }
1094 p++;
1095 } else if (*p == '$') {
1096 p = VarNameEnd(p);
1097 if (*p == 0) {
1098 return p;
1099 }
1100 p++;
1101 } else if (*p == 0) {
1102 return p;
1103 } else {
1104 p++;
1105 }
1106 }
1107 return p-1;
1108 }
1109
1110 /*
1111 *----------------------------------------------------------------------
1112 *
1113 * VarNameEnd --
1114 *
1115 * Given a pointer to a variable reference using $-notation, find
1116 * the end of the variable name spec.
1117 *
1118 * Results:
1119 * The return value is a pointer to the last character that
1120 * is part of the variable name. If the variable name doesn't
1121 * terminate properly then the return value is a pointer to the
1122 * null character at the end of the string.
1123 *
1124 * Side effects:
1125 * None.
1126 *
1127 *----------------------------------------------------------------------
1128 */
1129
1130 static char *
VarNameEnd(string)1131 VarNameEnd(string)
1132 char *string; /* Pointer to dollar-sign character. */
1133 {
1134 register char *p = string+1;
1135
1136 if (*p == '{') {
1137 for (p++; (*p != '}') && (*p != 0); p++) {
1138 /* Empty loop body. */
1139 }
1140 return p;
1141 }
1142 while (isalnum(UCHAR(*p)) || (*p == '_')) {
1143 p++;
1144 }
1145 if ((*p == '(') && (p != string+1)) {
1146 return QuoteEnd(p+1, ')');
1147 }
1148 return p-1;
1149 }
1150
1151
1152 /*
1153 *----------------------------------------------------------------------
1154 *
1155 * ScriptEnd --
1156 *
1157 * Given a pointer to the beginning of a Tcl script, find the end of
1158 * the script.
1159 *
1160 * Results:
1161 * The return value is a pointer to the last character that's part
1162 * of the script pointed to by "p". If the command doesn't end
1163 * properly within the string then the return value is the address
1164 * of the null character at the end of the string.
1165 *
1166 * Side effects:
1167 * None.
1168 *
1169 *----------------------------------------------------------------------
1170 */
1171
1172 static char *
ScriptEnd(p,nested)1173 ScriptEnd(p, nested)
1174 char *p; /* Script to check. */
1175 int nested; /* Zero means this is a top-level command.
1176 * One means this is a nested command (the
1177 * last character of the script must be
1178 * an unquoted ]). */
1179 {
1180 int commentOK = 1;
1181 int length;
1182
1183 while (1) {
1184 while (isspace(UCHAR(*p))) {
1185 if (*p == '\n') {
1186 commentOK = 1;
1187 }
1188 p++;
1189 }
1190 if ((*p == '#') && commentOK) {
1191 do {
1192 if (*p == '\\') {
1193 /*
1194 * If the script ends with backslash-newline, then
1195 * this command isn't complete.
1196 */
1197
1198 if ((p[1] == '\n') && (p[2] == 0)) {
1199 return p+2;
1200 }
1201 Tcl_Backslash(p, &length);
1202 p += length;
1203 } else {
1204 p++;
1205 }
1206 } while ((*p != 0) && (*p != '\n'));
1207 continue;
1208 }
1209 p = TclWordEnd(p, nested, &commentOK);
1210 if (*p == 0) {
1211 return p;
1212 }
1213 p++;
1214 if (nested) {
1215 if (*p == ']') {
1216 return p;
1217 }
1218 } else {
1219 if (*p == 0) {
1220 return p-1;
1221 }
1222 }
1223 }
1224 }
1225
1226 /*
1227 *----------------------------------------------------------------------
1228 *
1229 * Tcl_ParseVar --
1230 *
1231 * Given a string starting with a $ sign, parse off a variable
1232 * name and return its value.
1233 *
1234 * Results:
1235 * The return value is the contents of the variable given by
1236 * the leading characters of string. If termPtr isn't NULL,
1237 * *termPtr gets filled in with the address of the character
1238 * just after the last one in the variable specifier. If the
1239 * variable doesn't exist, then the return value is NULL and
1240 * an error message will be left in interp->result.
1241 *
1242 * Side effects:
1243 * None.
1244 *
1245 *----------------------------------------------------------------------
1246 */
1247
1248 char *
Tcl_ParseVar(interp,string,termPtr)1249 Tcl_ParseVar(interp, string, termPtr)
1250 Tcl_Interp *interp; /* Context for looking up variable. */
1251 register char *string; /* String containing variable name.
1252 * First character must be "$". */
1253 char **termPtr; /* If non-NULL, points to word to fill
1254 * in with character just after last
1255 * one in the variable specifier. */
1256
1257 {
1258 char *name1, *name1End, c, *result;
1259 register char *name2;
1260 #define NUM_CHARS 200
1261 char copyStorage[NUM_CHARS];
1262 ParseValue pv;
1263
1264 /*
1265 * There are three cases:
1266 * 1. The $ sign is followed by an open curly brace. Then the variable
1267 * name is everything up to the next close curly brace, and the
1268 * variable is a scalar variable.
1269 * 2. The $ sign is not followed by an open curly brace. Then the
1270 * variable name is everything up to the next character that isn't
1271 * a letter, digit, or underscore. If the following character is an
1272 * open parenthesis, then the information between parentheses is
1273 * the array element name, which can include any of the substitutions
1274 * permissible between quotes.
1275 * 3. The $ sign is followed by something that isn't a letter, digit,
1276 * or underscore: in this case, there is no variable name, and "$"
1277 * is returned.
1278 */
1279
1280 name2 = NULL;
1281 string++;
1282 if (*string == '{') {
1283 string++;
1284 name1 = string;
1285 while (*string != '}') {
1286 if (*string == 0) {
1287 Tcl_SetResult(interp, "missing close-brace for variable name",
1288 TCL_STATIC);
1289 if (termPtr != 0) {
1290 *termPtr = string;
1291 }
1292 return NULL;
1293 }
1294 string++;
1295 }
1296 name1End = string;
1297 string++;
1298 } else {
1299 name1 = string;
1300 while (isalnum(UCHAR(*string)) || (*string == '_')) {
1301 string++;
1302 }
1303 if (string == name1) {
1304 if (termPtr != 0) {
1305 *termPtr = string;
1306 }
1307 return "$";
1308 }
1309 name1End = string;
1310 if (*string == '(') {
1311 char *end;
1312
1313 /*
1314 * Perform substitutions on the array element name, just as
1315 * is done for quotes.
1316 */
1317
1318 pv.buffer = pv.next = copyStorage;
1319 pv.end = copyStorage + NUM_CHARS - 1;
1320 pv.expandProc = TclExpandParseValue;
1321 pv.clientData = (ClientData) NULL;
1322 if (TclParseQuotes(interp, string+1, ')', 0, &end, &pv)
1323 != TCL_OK) {
1324 char msg[200];
1325 int length;
1326
1327 length = string-name1;
1328 if (length > 100) {
1329 length = 100;
1330 }
1331 sprintf(msg, "\n (parsing index for array \"%.*s\")",
1332 length, name1);
1333 Tcl_AddErrorInfo(interp, msg);
1334 result = NULL;
1335 name2 = pv.buffer;
1336 if (termPtr != 0) {
1337 *termPtr = end;
1338 }
1339 goto done;
1340 }
1341 Tcl_ResetResult(interp);
1342 string = end;
1343 name2 = pv.buffer;
1344 }
1345 }
1346 if (termPtr != 0) {
1347 *termPtr = string;
1348 }
1349
1350 if (((Interp *) interp)->noEval) {
1351 return "";
1352 }
1353 c = *name1End;
1354 *name1End = 0;
1355 result = Tcl_GetVar2(interp, name1, name2, TCL_LEAVE_ERR_MSG);
1356 *name1End = c;
1357
1358 done:
1359 if ((name2 != NULL) && (pv.buffer != copyStorage)) {
1360 ckfree(pv.buffer);
1361 }
1362 return result;
1363 }
1364
1365 /*
1366 *----------------------------------------------------------------------
1367 *
1368 * Tcl_CommandComplete --
1369 *
1370 * Given a partial or complete Tcl command, this procedure
1371 * determines whether the command is complete in the sense
1372 * of having matched braces and quotes and brackets.
1373 *
1374 * Results:
1375 * 1 is returned if the command is complete, 0 otherwise.
1376 *
1377 * Side effects:
1378 * None.
1379 *
1380 *----------------------------------------------------------------------
1381 */
1382
1383 int
Tcl_CommandComplete(cmd)1384 Tcl_CommandComplete(cmd)
1385 char *cmd; /* Command to check. */
1386 {
1387 char *p;
1388
1389 if (*cmd == 0) {
1390 return 1;
1391 }
1392 p = ScriptEnd(cmd, 0);
1393 return (*p != 0);
1394 }
1395