1 /* ====================================================================
2 * Copyright 1999 J. David Lowe. All rights reserved.
3 *
4 * tokens.c
5 *
6 * Functions for manipulating a token list.
7 *
8 * ==================================================================== */
9
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 #include <template.h>
16
17
18
19 /* ====================================================================
20 * NAME: token_group_init
21 *
22 * DESCRIPTION: Initializes and returns a pointer to a new token_group
23 * structure.
24 *
25 * RETURN VALUES: Returns NULL if the memory allocation fails; otherwise
26 * returns a pointer to a token_group structure.
27 *
28 * BUGS: Hopefully none.
29 * ==================================================================== */
30 token_group_p
token_group_init(void)31 token_group_init(void)
32 {
33 token_group_p tgroup;
34
35 tgroup = (token_group_p)malloc(sizeof(token_group));
36 if (tgroup == NULL)
37 {
38 template_errno = TMPL_EMALLOC;
39 return NULL;
40 }
41
42 tgroup->tokens = NULL;
43 tgroup->max_token = -1;
44 tgroup->first = 0;
45 tgroup->last = 0;
46 tgroup->current = 0;
47
48 return(tgroup);
49 }
50
51
52
53 /* ====================================================================
54 * NAME: token_subgroup_init
55 *
56 * DESCRIPTION: Initializes and returns a pointer to a new token_group
57 * structure, "inherited" from an existing token_group.
58 *
59 * RETURN VALUES: Returns NULL if the memory allocation fails; otherwise
60 * returns a pointer to a token_group structure.
61 *
62 * BUGS: Hopefully none.
63 * ==================================================================== */
64 token_group_p
token_subgroup_init(token_group_p tgroup,unsigned int first,unsigned int last)65 token_subgroup_init(token_group_p tgroup, unsigned int first,
66 unsigned int last)
67 {
68 token_group_p new;
69
70 if (tgroup == NULL)
71 {
72 template_errno = TMPL_ENULLARG;
73 return NULL;
74 }
75
76 new = token_group_init();
77 if (new == NULL)
78 {
79 return NULL;
80 }
81
82 new->tokens = tgroup->tokens;
83 new->max_token = tgroup->max_token;
84 new->first = first;
85 new->last = last;
86 new->current = 0;
87
88 return(new);
89 }
90
91
92
93 /* ====================================================================
94 * NAME: token_group_destroy
95 *
96 * DESCRIPTION: Frees up all memory associated with a token_group.
97 *
98 * RETURN VALUES: None.
99 *
100 * BUGS: Because a free()d pointer still *looks* valid, it is
101 * difficult to protect against the problems that arise
102 * if the user calls this function too early.
103 * ==================================================================== */
104 void
token_group_destroy(token_group_p tgroup)105 token_group_destroy(token_group_p tgroup)
106 {
107 if (tgroup == NULL)
108 {
109 return;
110 }
111
112 if (tgroup->tokens != NULL)
113 {
114 int i;
115
116 for (i = 0; i <= tgroup->max_token; i++)
117 {
118 if (tgroup->tokens[i].type == TOKEN_TYPE_TAG_PARSED)
119 {
120 int j;
121
122 for (j = 0; j <= tgroup->tokens[i].tag_argc; j++)
123 {
124 free(tgroup->tokens[i].tag_argv[j]);
125 }
126 free(tgroup->tokens[i].tag_argv);
127
128 tgroup->tokens[i].type = TOKEN_TYPE_TAG;
129 }
130 }
131
132 free(tgroup->tokens);
133 tgroup->tokens = NULL;
134 }
135
136 free(tgroup);
137 }
138
139
140
141 /* ====================================================================
142 * NAME: token_subgroup_destroy
143 *
144 * DESCRIPTION: Frees up all memory associated with a token_group
145 * created by token_subgroup_init()
146 *
147 * RETURN VALUES: None.
148 *
149 * BUGS: Because a free()d pointer still *looks* valid, it is
150 * difficult to protect against the problems that arise
151 * if the user calls this function too early.
152 * ==================================================================== */
153 void
token_subgroup_destroy(token_group_p tgroup)154 token_subgroup_destroy(token_group_p tgroup)
155 {
156 if (tgroup == NULL)
157 {
158 return;
159 }
160
161 tgroup->tokens = NULL;
162 token_group_destroy(tgroup);
163 }
164
165
166
167 /* ====================================================================
168 * NAME: token_rewind
169 *
170 * DESCRIPTION: Reets the token group's "current" index.
171 *
172 * RETURN VALUES: None.
173 *
174 * BUGS: Hopefully none.
175 * ==================================================================== */
176 void
token_rewind(token_group_p tgroup)177 token_rewind(token_group_p tgroup)
178 {
179 if (tgroup != NULL)
180 {
181 tgroup->current = tgroup->first;
182 }
183 }
184
185
186
187 /* ====================================================================
188 * NAME: token_next
189 *
190 * DESCRIPTION: Returns the next token in a token group, and increments
191 * the token group's "current" index.
192 *
193 * RETURN VALUES: Returns NULL if there are no more tokens to return;
194 * otherwise returns a pointer to a token structure.
195 *
196 * BUGS: Hopefully none.
197 * ==================================================================== */
198 token_p
token_next(context_p ctx,token_group_p tgroup,unsigned int * position)199 token_next(context_p ctx, token_group_p tgroup, unsigned int *position)
200 {
201 token_p rtok;
202
203 if (tgroup == NULL)
204 {
205 template_errno = TMPL_ENULLARG;
206 return NULL;
207 }
208
209 if ((tgroup->max_token < 0) || (tgroup->tokens == NULL))
210 {
211 template_errno = TMPL_ENOTOKEN;
212 return NULL;
213 }
214
215 /* If current > last, we're done, so return NULL, and wrap */
216 if (tgroup->current > tgroup->last)
217 {
218 tgroup->current = tgroup->first;
219 template_errno = TMPL_ENOTOKEN;
220 return NULL;
221 }
222
223 /* Otherwise, increment and return the next token */
224 *position = tgroup->current;
225 ++(tgroup->current);
226 rtok = &(tgroup->tokens[*position]);
227
228 /* Side effect: parse tag if not already done */
229 if (rtok->type == TOKEN_TYPE_TAG)
230 {
231 token_parsetag(ctx, rtok);
232 } else if (rtok->type == TOKEN_TYPE_TAG_PARSED)
233 {
234 int j;
235
236 for (j = 1; j <= rtok->tag_argc; j++)
237 {
238 free(rtok->tag_argv[j]);
239 rtok->tag_argv[j] = NULL;
240 }
241
242 token_parsetag(ctx, rtok);
243 }
244
245 return(rtok);
246 }
247
248
249
250 /* ====================================================================
251 * NAME: token_parsetag
252 *
253 * DESCRIPTION: Parses a tag's argument list in the current context.
254 *
255 * RETURN VALUES: None.
256 *
257 * BUGS: Hopefully none.
258 * ==================================================================== */
259 void
token_parsetag(context_p ctx,token_p token)260 token_parsetag(context_p ctx, token_p token)
261 {
262 int length = 0;
263 int point = 0;
264 int total = token->length;
265 int argbegin;
266 char *tag, *p, last, instring;
267 int current_argc;
268
269 tag = token->t;
270
271 for (; (point < total) && isspace(tag[point]); point++);
272 for (p = tag + point; (point + length < total) && (!isspace(*p)); p++, length++) ;
273
274 current_argc = 0;
275 if (token->tag_argc < current_argc)
276 {
277 token->tag_argv =
278 (char **)malloc((current_argc + 1) * sizeof(char **));
279 token->tag_argc = current_argc;
280
281 /* Copy the tag name into argv[0] only if it's not already done */
282 token->tag_argv[0] = (char *)malloc(length + 1);
283 strncpy(token->tag_argv[0], tag + point, length);
284 (token->tag_argv[0])[length] = '\0';
285 }
286
287 last = '\0';
288 instring = 0;
289 argbegin = 0;
290 for (point += length + 1; point < total; point++)
291 {
292 last = *p;
293 p = tag + point;
294
295 if ((! isspace((int)*p)) && (current_argc == 0))
296 {
297 argbegin = point;
298 ++current_argc;
299 if (token->tag_argc < current_argc)
300 {
301 token->tag_argv = (char **)realloc(token->tag_argv,
302 (current_argc + 1) * (sizeof(char *)));
303 token->tag_argc = current_argc;
304 }
305 }
306 if (*p == '"')
307 {
308 if ((instring) && (last != '\\'))
309 {
310 instring = 0;
311 } else if (! instring)
312 {
313 instring = 1;
314 }
315 } else if (*p == ',')
316 {
317 if (! instring)
318 {
319 /* parse the current argument string into tag_argv */
320 token_parsearg(ctx, tag + argbegin, point - argbegin,
321 &(token->tag_argv[current_argc]));
322
323 /* point to the next argument string */
324 argbegin = point + 1;
325 ++current_argc;
326 if (token->tag_argc < current_argc)
327 {
328 token->tag_argv = (char **)realloc(token->tag_argv,
329 (current_argc + 1) * (sizeof(char *)));
330 token->tag_argc = current_argc;
331 }
332 }
333 }
334 }
335 if (current_argc > 0)
336 {
337 token_parsearg(ctx, tag + argbegin, total - argbegin,
338 &(token->tag_argv[current_argc]));
339 }
340
341 token->type = TOKEN_TYPE_TAG_PARSED;
342
343 return;
344 }
345
346
347
348 /* ====================================================================
349 * NAME: token_push
350 *
351 * DESCRIPTION: Adds a new token to a token group, extending the group's
352 * token list if necessary.
353 *
354 * RETURN VALUES: Returns 0 on failure, 1 on success.
355 *
356 * BUGS: Hopefully none.
357 * ==================================================================== */
358 int
token_push(token_group_p tgroup,char * t,unsigned long length,unsigned char type)359 token_push(token_group_p tgroup, char *t, unsigned long length,
360 unsigned char type)
361 {
362 if (tgroup == NULL)
363 {
364 template_errno = TMPL_ENULLARG;
365 return 0;
366 }
367
368 if (tgroup->max_token < 0)
369 {
370 --(tgroup->last);
371 }
372
373 if ((tgroup->max_token < 0) || (tgroup->last >= tgroup->max_token)) {
374 /* We have to allocate some new token space */
375 unsigned int i;
376
377 tgroup->max_token += TOKEN_GROWFACTOR;
378 tgroup->tokens = (token_p)realloc((void *)tgroup->tokens,
379 sizeof(token) * (tgroup->max_token + 1));
380
381 for (i = tgroup->last + 1; i <= tgroup->max_token; i++)
382 {
383 (tgroup->tokens[i]).type = TOKEN_TYPE_NONE;
384 }
385 }
386
387 ++(tgroup->last);
388
389 tgroup->tokens[tgroup->last].t = t;
390 tgroup->tokens[tgroup->last].tag_argc = -1;
391 tgroup->tokens[tgroup->last].tag_argv = NULL;
392 tgroup->tokens[tgroup->last].length = length;
393 tgroup->tokens[tgroup->last].type = type;
394
395 return(1);
396 }
397
398
399
400 /* ====================================================================
401 * NAME: tokenize
402 *
403 * DESCRIPTION: Breaks a string into a token group using the rules in
404 * the current context.
405 *
406 * RETURN VALUES: Returns 0 if the input string contains unrecoverable
407 * syntax errors, 1 otherwise.
408 *
409 * BUGS: Hopefully none.
410 * ==================================================================== */
411 int
tokenize(context_p ctx,char * input,token_group_p tokens)412 tokenize(context_p ctx, char *input, token_group_p tokens)
413 {
414 char strip = ctx_is_strip(ctx);
415 char *otag = context_get_value(ctx, TMPL_VARNAME_OTAG);
416 int slo = strlen(otag);
417 char *ctag = context_get_value(ctx, TMPL_VARNAME_CTAG);
418 int slc = strlen(ctag);
419
420 char *position = input;
421 char *tagstart, *tagend;
422
423 while ((tagstart = strstr(position, otag)) != NULL)
424 {
425 /* everything before the open tag is one token */
426 token_push(tokens, position, tagstart - position, TOKEN_TYPE_TEXT);
427
428 /* find the end of the tag */
429 tagend = strstr(tagstart, ctag);
430 if (tagend == NULL)
431 {
432 /* tokens_destroy */
433 template_errno = TMPL_EPARSE;
434 return 0;
435 }
436
437 /* the tag is one token */
438 token_push(tokens, tagstart + slo, tagend - tagstart - slo,
439 TOKEN_TYPE_TAG);
440
441 /* move past the end of the tag */
442 position = tagend + slc;
443 if ((strip) && (*position == '\n')) position++;
444 }
445
446 /* everything after the last tag is one token */
447 token_push(tokens, position, strlen(position), TOKEN_TYPE_TEXT);
448
449 return 1;
450 }
451
452
453
454 /* ====================================================================
455 * NAME: token_parsearg
456 *
457 * DESCRIPTION: Parses a string (inarg) as a single argument. Does
458 * variable substitution and string concatentation, and
459 * outputs the result into outarg.
460 *
461 * RETURN VALUES: None - output is placed into outarg.
462 *
463 * BUGS: Character by character parsing may be avoidable - not
464 * sure.
465 * ==================================================================== */
466 void
token_parsearg(context_p ctx,char * inarg,int size,char ** outarg)467 token_parsearg(context_p ctx, char *inarg, int size, char **outarg)
468 {
469 char *begin, *p, *varvalue, *b;
470 char instring, last;
471 int index, cursize, i, length;
472 context_p rootctx = NULL;
473
474 i = 0;
475 index = 0;
476 cursize = 0;
477 *outarg = NULL;
478
479 /* move past leading whitespace */
480 for (begin = inarg; isspace((int)*begin); ++begin, ++i) ;
481
482 instring = 0;
483 last = '\0';
484 for (p = begin; i < size; last = *p, p++, i++)
485 {
486 if (*p == '"')
487 {
488 if (instring)
489 {
490 if (last == '\\')
491 {
492 --index;
493 append_output(outarg, "\"", 1, &cursize, &index);
494 } else
495 {
496 instring = 0;
497 }
498 } else if (! instring)
499 {
500 instring = 1;
501 }
502 } else if (*p == '$')
503 {
504 if (instring)
505 {
506 append_output(outarg, p, 1, &cursize, &index);
507 } else
508 {
509 b = ++p;
510
511 for (++i; ((i <= size) && (isalnum((int)*p) || (*p == '_') || (*p == '.'))); p++, i++) ;
512
513 length = p - b;
514
515 if (rootctx == NULL)
516 {
517 rootctx = context_root(ctx);
518 }
519
520 if (rootctx->bufsize < (length + 1))
521 {
522 if (rootctx->buffer != NULL)
523 {
524 free(rootctx->buffer);
525 }
526 rootctx->buffer = (char *)malloc(length + 1);
527 rootctx->bufsize = length + 1;
528 }
529 strncpy(rootctx->buffer, b, length);
530 (rootctx->buffer)[length] = '\0';
531
532 varvalue = context_get_value(ctx, rootctx->buffer);
533 if (varvalue != NULL)
534 {
535 append_output(outarg, varvalue, strlen(varvalue),
536 &cursize, &index);
537 }
538 --p;
539 --i;
540 }
541 } else
542 {
543 if (instring)
544 {
545 append_output(outarg, p, 1, &cursize, &index);
546 }
547 }
548 }
549
550 /* ensure null termination even if append_output was never called */
551 if (*outarg != NULL)
552 {
553 (*outarg)[index] = '\0';
554 }
555 }
556
557
558
559 /* ====================================================================
560 * NAME: append_output
561 *
562 * DESCRIPTION: Function used by parser to dynamically expand a string
563 * as needed. This is really a glorified strncat which
564 * grows the destination string as needed.
565 *
566 * RETURN VALUES: None, but *output is modified.
567 *
568 * BUGS: Hopefully none.
569 * ==================================================================== */
570 void
append_output(char ** output,char * append,int append_size,int * current_size,int * current_length)571 append_output(char **output, char *append, int append_size, int *current_size,
572 int *current_length)
573 {
574 if (((*current_length) + append_size + 1) > *current_size) {
575 char *temp;
576
577 if (((*current_length) + append_size + 1) > ((*current_size) * 2))
578 {
579 *current_size = ((*current_length) + append_size + 1) * 2;
580 } else
581 {
582 *current_size = (*current_size) * 2;
583 }
584 temp = (char *)malloc(*current_size);
585
586 if (*output != NULL)
587 {
588 strncpy(temp, *output, *current_length);
589 temp[*current_length] = '\0';
590
591 free(*output);
592 }
593 *output = temp;
594 }
595
596 strncpy((*output) + (*current_length), append, append_size);
597 (*output)[(*current_length) + append_size] = '\0';
598
599 (*current_length) += append_size;
600 }
601