1 /* ====================================================================
2  * Copyright 1999 J. David Lowe. All rights reserved.
3  *
4  * tokens.c
5  *
6  * Functions for manipulating a token list.
7  *
8  * ==================================================================== */
9 
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <ctype.h>
14 
15 #include <template.h>
16 
17 
18 
19 /* ====================================================================
20  * NAME:          token_group_init
21  *
22  * DESCRIPTION:   Initializes and returns a pointer to a new token_group
23  *                structure.
24  *
25  * RETURN VALUES: Returns NULL if the memory allocation fails; otherwise
26  *                returns a pointer to a token_group structure.
27  *
28  * BUGS:          Hopefully none.
29  * ==================================================================== */
30 token_group_p
token_group_init(void)31 token_group_init(void)
32 {
33     token_group_p tgroup;
34 
35     tgroup = (token_group_p)malloc(sizeof(token_group));
36     if (tgroup == NULL)
37     {
38         template_errno = TMPL_EMALLOC;
39         return NULL;
40     }
41 
42     tgroup->tokens      = NULL;
43     tgroup->max_token   = -1;
44     tgroup->first       = 0;
45     tgroup->last        = 0;
46     tgroup->current     = 0;
47 
48     return(tgroup);
49 }
50 
51 
52 
53 /* ====================================================================
54  * NAME:          token_subgroup_init
55  *
56  * DESCRIPTION:   Initializes and returns a pointer to a new token_group
57  *                structure, "inherited" from an existing token_group.
58  *
59  * RETURN VALUES: Returns NULL if the memory allocation fails; otherwise
60  *                returns a pointer to a token_group structure.
61  *
62  * BUGS:          Hopefully none.
63  * ==================================================================== */
64 token_group_p
token_subgroup_init(token_group_p tgroup,unsigned int first,unsigned int last)65 token_subgroup_init(token_group_p tgroup, unsigned int first,
66                     unsigned int last)
67 {
68     token_group_p new;
69 
70     if (tgroup == NULL)
71     {
72         template_errno = TMPL_ENULLARG;
73         return NULL;
74     }
75 
76     new = token_group_init();
77     if (new == NULL)
78     {
79          return NULL;
80     }
81 
82     new->tokens      = tgroup->tokens;
83     new->max_token   = tgroup->max_token;
84     new->first       = first;
85     new->last        = last;
86     new->current     = 0;
87 
88     return(new);
89 }
90 
91 
92 
93 /* ====================================================================
94  * NAME:          token_group_destroy
95  *
96  * DESCRIPTION:   Frees up all memory associated with a token_group.
97  *
98  * RETURN VALUES: None.
99  *
100  * BUGS:          Because a free()d pointer still *looks* valid, it is
101  *                difficult to protect against the problems that arise
102  *                if the user calls this function too early.
103  * ==================================================================== */
104 void
token_group_destroy(token_group_p tgroup)105 token_group_destroy(token_group_p tgroup)
106 {
107     if (tgroup == NULL)
108     {
109         return;
110     }
111 
112     if (tgroup->tokens != NULL)
113     {
114         int i;
115 
116         for (i = 0; i <= tgroup->max_token; i++)
117         {
118             if (tgroup->tokens[i].type == TOKEN_TYPE_TAG_PARSED)
119             {
120                 int j;
121 
122                 for (j = 0; j <= tgroup->tokens[i].tag_argc; j++)
123                 {
124                     free(tgroup->tokens[i].tag_argv[j]);
125                 }
126                 free(tgroup->tokens[i].tag_argv);
127 
128                 tgroup->tokens[i].type = TOKEN_TYPE_TAG;
129             }
130         }
131 
132         free(tgroup->tokens);
133         tgroup->tokens = NULL;
134     }
135 
136     free(tgroup);
137 }
138 
139 
140 
141 /* ====================================================================
142  * NAME:          token_subgroup_destroy
143  *
144  * DESCRIPTION:   Frees up all memory associated with a token_group
145  *                created by token_subgroup_init()
146  *
147  * RETURN VALUES: None.
148  *
149  * BUGS:          Because a free()d pointer still *looks* valid, it is
150  *                difficult to protect against the problems that arise
151  *                if the user calls this function too early.
152  * ==================================================================== */
153 void
token_subgroup_destroy(token_group_p tgroup)154 token_subgroup_destroy(token_group_p tgroup)
155 {
156     if (tgroup == NULL)
157     {
158         return;
159     }
160 
161     tgroup->tokens = NULL;
162     token_group_destroy(tgroup);
163 }
164 
165 
166 
167 /* ====================================================================
168  * NAME:          token_rewind
169  *
170  * DESCRIPTION:   Reets the token group's "current" index.
171  *
172  * RETURN VALUES: None.
173  *
174  * BUGS:          Hopefully none.
175  * ==================================================================== */
176 void
token_rewind(token_group_p tgroup)177 token_rewind(token_group_p tgroup)
178 {
179     if (tgroup != NULL)
180     {
181         tgroup->current = tgroup->first;
182     }
183 }
184 
185 
186 
187 /* ====================================================================
188  * NAME:          token_next
189  *
190  * DESCRIPTION:   Returns the next token in a token group, and increments
191  *                the token group's "current" index.
192  *
193  * RETURN VALUES: Returns NULL if there are no more tokens to return;
194  *                otherwise returns a pointer to a token structure.
195  *
196  * BUGS:          Hopefully none.
197  * ==================================================================== */
198 token_p
token_next(context_p ctx,token_group_p tgroup,unsigned int * position)199 token_next(context_p ctx, token_group_p tgroup, unsigned int *position)
200 {
201     token_p rtok;
202 
203     if (tgroup == NULL)
204     {
205         template_errno = TMPL_ENULLARG;
206         return NULL;
207     }
208 
209     if ((tgroup->max_token < 0) || (tgroup->tokens == NULL))
210     {
211         template_errno = TMPL_ENOTOKEN;
212         return NULL;
213     }
214 
215     /* If current > last, we're done, so return NULL, and wrap */
216     if (tgroup->current > tgroup->last)
217     {
218         tgroup->current = tgroup->first;
219         template_errno = TMPL_ENOTOKEN;
220         return NULL;
221     }
222 
223     /* Otherwise, increment and return the next token */
224     *position = tgroup->current;
225     ++(tgroup->current);
226     rtok = &(tgroup->tokens[*position]);
227 
228     /* Side effect: parse tag if not already done */
229     if (rtok->type == TOKEN_TYPE_TAG)
230     {
231         token_parsetag(ctx, rtok);
232     } else if (rtok->type == TOKEN_TYPE_TAG_PARSED)
233     {
234         int j;
235 
236         for (j = 1; j <= rtok->tag_argc; j++)
237         {
238             free(rtok->tag_argv[j]);
239             rtok->tag_argv[j] = NULL;
240         }
241 
242         token_parsetag(ctx, rtok);
243     }
244 
245     return(rtok);
246 }
247 
248 
249 
250 /* ====================================================================
251  * NAME:          token_parsetag
252  *
253  * DESCRIPTION:   Parses a tag's argument list in the current context.
254  *
255  * RETURN VALUES: None.
256  *
257  * BUGS:          Hopefully none.
258  * ==================================================================== */
259 void
token_parsetag(context_p ctx,token_p token)260 token_parsetag(context_p ctx, token_p token)
261 {
262     int length = 0;
263     int point  = 0;
264     int total  = token->length;
265     int argbegin;
266     char *tag, *p, last, instring;
267     int current_argc;
268 
269     tag = token->t;
270 
271     for (; (point < total) && isspace(tag[point]); point++);
272     for (p = tag + point; (point + length < total) && (!isspace(*p)); p++, length++) ;
273 
274     current_argc = 0;
275     if (token->tag_argc < current_argc)
276     {
277         token->tag_argv =
278                       (char **)malloc((current_argc + 1) * sizeof(char **));
279         token->tag_argc = current_argc;
280 
281         /* Copy the tag name into argv[0] only if it's not already done */
282         token->tag_argv[0] = (char *)malloc(length + 1);
283         strncpy(token->tag_argv[0], tag + point, length);
284         (token->tag_argv[0])[length] = '\0';
285     }
286 
287     last      = '\0';
288     instring  = 0;
289     argbegin  = 0;
290     for (point += length + 1; point < total; point++)
291     {
292         last = *p;
293         p = tag + point;
294 
295         if ((! isspace((int)*p)) && (current_argc == 0))
296         {
297             argbegin = point;
298             ++current_argc;
299             if (token->tag_argc < current_argc)
300             {
301                 token->tag_argv = (char **)realloc(token->tag_argv,
302                                         (current_argc + 1) * (sizeof(char *)));
303                 token->tag_argc = current_argc;
304             }
305         }
306         if (*p == '"')
307         {
308             if ((instring) && (last != '\\'))
309             {
310                 instring = 0;
311             } else if (! instring)
312             {
313                 instring = 1;
314             }
315         } else if (*p == ',')
316         {
317             if (! instring)
318             {
319                 /* parse the current argument string into tag_argv */
320                 token_parsearg(ctx, tag + argbegin, point - argbegin,
321                                &(token->tag_argv[current_argc]));
322 
323                 /* point to the next argument string */
324                 argbegin = point + 1;
325                 ++current_argc;
326                 if (token->tag_argc < current_argc)
327                 {
328                     token->tag_argv = (char **)realloc(token->tag_argv,
329                                         (current_argc + 1) * (sizeof(char *)));
330                     token->tag_argc = current_argc;
331                 }
332             }
333         }
334     }
335     if (current_argc > 0)
336     {
337         token_parsearg(ctx, tag + argbegin, total - argbegin,
338                        &(token->tag_argv[current_argc]));
339     }
340 
341     token->type = TOKEN_TYPE_TAG_PARSED;
342 
343     return;
344 }
345 
346 
347 
348 /* ====================================================================
349  * NAME:          token_push
350  *
351  * DESCRIPTION:   Adds a new token to a token group, extending the group's
352  *                token list if necessary.
353  *
354  * RETURN VALUES: Returns 0 on failure, 1 on success.
355  *
356  * BUGS:          Hopefully none.
357  * ==================================================================== */
358 int
token_push(token_group_p tgroup,char * t,unsigned long length,unsigned char type)359 token_push(token_group_p tgroup, char *t, unsigned long length,
360            unsigned char type)
361 {
362     if (tgroup == NULL)
363     {
364         template_errno = TMPL_ENULLARG;
365         return 0;
366     }
367 
368     if (tgroup->max_token < 0)
369     {
370         --(tgroup->last);
371     }
372 
373     if ((tgroup->max_token < 0) || (tgroup->last >= tgroup->max_token)) {
374         /* We have to allocate some new token space */
375         unsigned int i;
376 
377         tgroup->max_token += TOKEN_GROWFACTOR;
378         tgroup->tokens = (token_p)realloc((void *)tgroup->tokens,
379                                       sizeof(token) * (tgroup->max_token + 1));
380 
381         for (i = tgroup->last + 1; i <= tgroup->max_token; i++)
382         {
383             (tgroup->tokens[i]).type = TOKEN_TYPE_NONE;
384         }
385     }
386 
387     ++(tgroup->last);
388 
389     tgroup->tokens[tgroup->last].t        = t;
390     tgroup->tokens[tgroup->last].tag_argc = -1;
391     tgroup->tokens[tgroup->last].tag_argv = NULL;
392     tgroup->tokens[tgroup->last].length   = length;
393     tgroup->tokens[tgroup->last].type     = type;
394 
395     return(1);
396 }
397 
398 
399 
400 /* ====================================================================
401  * NAME:          tokenize
402  *
403  * DESCRIPTION:   Breaks a string into a token group using the rules in
404  *                the current context.
405  *
406  * RETURN VALUES: Returns 0 if the input string contains unrecoverable
407  *                syntax errors, 1 otherwise.
408  *
409  * BUGS:          Hopefully none.
410  * ==================================================================== */
411 int
tokenize(context_p ctx,char * input,token_group_p tokens)412 tokenize(context_p ctx, char *input, token_group_p tokens)
413 {
414     char strip = ctx_is_strip(ctx);
415     char *otag = context_get_value(ctx, TMPL_VARNAME_OTAG);
416     int  slo   = strlen(otag);
417     char *ctag = context_get_value(ctx, TMPL_VARNAME_CTAG);
418     int  slc   = strlen(ctag);
419 
420     char *position = input;
421     char *tagstart, *tagend;
422 
423     while ((tagstart = strstr(position, otag)) != NULL)
424     {
425         /* everything before the open tag is one token */
426         token_push(tokens, position, tagstart - position, TOKEN_TYPE_TEXT);
427 
428         /* find the end of the tag */
429         tagend = strstr(tagstart, ctag);
430         if (tagend == NULL)
431         {
432             /* tokens_destroy */
433             template_errno = TMPL_EPARSE;
434             return 0;
435         }
436 
437         /* the tag is one token */
438         token_push(tokens, tagstart + slo, tagend - tagstart - slo,
439                    TOKEN_TYPE_TAG);
440 
441         /* move past the end of the tag */
442         position = tagend + slc;
443         if ((strip) && (*position == '\n')) position++;
444     }
445 
446     /* everything after the last tag is one token */
447     token_push(tokens, position, strlen(position), TOKEN_TYPE_TEXT);
448 
449     return 1;
450 }
451 
452 
453 
454 /* ====================================================================
455  * NAME:          token_parsearg
456  *
457  * DESCRIPTION:   Parses a string (inarg) as a single argument.  Does
458  *                variable substitution and string concatentation, and
459  *                outputs the result into outarg.
460  *
461  * RETURN VALUES: None - output is placed into outarg.
462  *
463  * BUGS:          Character by character parsing may be avoidable - not
464  *                sure.
465  * ==================================================================== */
466 void
token_parsearg(context_p ctx,char * inarg,int size,char ** outarg)467 token_parsearg(context_p ctx, char *inarg, int size, char **outarg)
468 {
469     char *begin, *p, *varvalue, *b;
470     char instring, last;
471     int  index, cursize, i, length;
472     context_p rootctx = NULL;
473 
474     i       = 0;
475     index   = 0;
476     cursize = 0;
477     *outarg = NULL;
478 
479     /* move past leading whitespace */
480     for (begin = inarg; isspace((int)*begin); ++begin, ++i) ;
481 
482     instring = 0;
483     last     = '\0';
484     for (p = begin; i < size; last = *p, p++, i++)
485     {
486         if (*p == '"')
487         {
488             if (instring)
489             {
490                 if (last == '\\')
491                 {
492                     --index;
493                     append_output(outarg, "\"", 1, &cursize, &index);
494                 } else
495                 {
496                     instring = 0;
497                 }
498             } else if (! instring)
499             {
500                 instring = 1;
501             }
502         } else if (*p == '$')
503         {
504             if (instring)
505             {
506                 append_output(outarg, p, 1, &cursize, &index);
507             } else
508             {
509                 b = ++p;
510 
511                 for (++i; ((i <= size) && (isalnum((int)*p) || (*p == '_') || (*p == '.'))); p++, i++) ;
512 
513                 length = p - b;
514 
515                 if (rootctx == NULL)
516                 {
517                     rootctx = context_root(ctx);
518                 }
519 
520                 if (rootctx->bufsize < (length + 1))
521                 {
522                     if (rootctx->buffer != NULL)
523                     {
524                         free(rootctx->buffer);
525                     }
526                     rootctx->buffer  = (char *)malloc(length + 1);
527                     rootctx->bufsize = length + 1;
528                 }
529                 strncpy(rootctx->buffer, b, length);
530                 (rootctx->buffer)[length] = '\0';
531 
532                 varvalue = context_get_value(ctx, rootctx->buffer);
533                 if (varvalue != NULL)
534                 {
535                     append_output(outarg, varvalue, strlen(varvalue),
536                                   &cursize, &index);
537                 }
538                 --p;
539                 --i;
540             }
541         } else
542         {
543             if (instring)
544             {
545                 append_output(outarg, p, 1, &cursize, &index);
546             }
547         }
548     }
549 
550     /* ensure null termination even if append_output was never called */
551     if (*outarg != NULL)
552     {
553         (*outarg)[index] = '\0';
554     }
555 }
556 
557 
558 
559 /* ====================================================================
560  * NAME:          append_output
561  *
562  * DESCRIPTION:   Function used by parser to dynamically expand a string
563  *                as needed.  This is really a glorified strncat which
564  *                grows the destination string as needed.
565  *
566  * RETURN VALUES: None, but *output is modified.
567  *
568  * BUGS:          Hopefully none.
569  * ==================================================================== */
570 void
append_output(char ** output,char * append,int append_size,int * current_size,int * current_length)571 append_output(char **output, char *append, int append_size, int *current_size,
572               int *current_length)
573 {
574     if (((*current_length) + append_size + 1) > *current_size) {
575         char *temp;
576 
577         if (((*current_length) + append_size + 1) > ((*current_size) * 2))
578         {
579             *current_size = ((*current_length) + append_size + 1) * 2;
580         } else
581         {
582             *current_size = (*current_size) * 2;
583         }
584         temp = (char *)malloc(*current_size);
585 
586         if (*output != NULL)
587         {
588             strncpy(temp, *output, *current_length);
589             temp[*current_length] = '\0';
590 
591             free(*output);
592         }
593         *output = temp;
594     }
595 
596     strncpy((*output) + (*current_length), append, append_size);
597     (*output)[(*current_length) + append_size] = '\0';
598 
599     (*current_length) += append_size;
600 }
601