1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                                  Cristy                                     %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    https://imagemagick.org/script/license.php                               %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 
40 /*
41   Include declarations.
42 */
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/image-private.h"
48 #include "MagickCore/memory_.h"
49 #include "MagickCore/memory-private.h"
50 #include "MagickCore/string_.h"
51 #include "MagickCore/string-private.h"
52 #include "MagickCore/token.h"
53 #include "MagickCore/token-private.h"
54 #include "MagickCore/utility.h"
55 #include "MagickCore/utility-private.h"
56 
57 /*
58   Typedef declaractions.
59 */
60 struct _TokenInfo
61 {
62   int
63     state;
64 
65   MagickStatusType
66     flag;
67 
68   ssize_t
69     offset;
70 
71   char
72     quote;
73 
74   size_t
75     signature;
76 };
77 
78 /*
79 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
80 %                                                                             %
81 %                                                                             %
82 %                                                                             %
83 %   A c q u i r e T o k e n I n f o                                           %
84 %                                                                             %
85 %                                                                             %
86 %                                                                             %
87 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
88 %
89 %  AcquireTokenInfo() allocates the TokenInfo structure.
90 %
91 %  The format of the AcquireTokenInfo method is:
92 %
93 %      TokenInfo *AcquireTokenInfo()
94 %
95 */
AcquireTokenInfo(void)96 MagickExport TokenInfo *AcquireTokenInfo(void)
97 {
98   TokenInfo
99     *token_info;
100 
101   token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
102   token_info->signature=MagickCoreSignature;
103   return(token_info);
104 }
105 
106 /*
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108 %                                                                             %
109 %                                                                             %
110 %                                                                             %
111 %   D e s t r o y T o k e n I n f o                                           %
112 %                                                                             %
113 %                                                                             %
114 %                                                                             %
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 %
117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
118 %  structure.
119 %
120 %  The format of the DestroyTokenInfo method is:
121 %
122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123 %
124 %  A description of each parameter follows:
125 %
126 %    o token_info: Specifies a pointer to an TokenInfo structure.
127 %
128 */
DestroyTokenInfo(TokenInfo * token_info)129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130 {
131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132   assert(token_info != (TokenInfo *) NULL);
133   assert(token_info->signature == MagickCoreSignature);
134   token_info->signature=(~MagickCoreSignature);
135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136   return(token_info);
137 }
138 
139 /*
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 %                                                                             %
142 %                                                                             %
143 %                                                                             %
144 +   G e t N e x t T o k e n                                                   %
145 %                                                                             %
146 %                                                                             %
147 %                                                                             %
148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149 %
150 %  GetNextToken() gets a token from the token stream.  A token is defined as
151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153 %  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
154 %  separator characters: ':', '=', ',', and ';'.  GetNextToken() returns the
155 %  length of the consumed token.
156 %
157 %  The format of the GetNextToken method is:
158 %
159 %      size_t GetNextToken(const char *magick_restrict start,
160 %        const char **magick_restrict end,const size_t extent,
161 %        char *magick_restrict token)
162 %
163 %  A description of each parameter follows:
164 %
165 %    o start: the start of the token sequence.
166 %
167 %    o end: point to the end of the token sequence.
168 %
169 %    o extent: maximum extent of the token.
170 %
171 %    o token: copy the token to this buffer.
172 %
173 */
GetNextToken(const char * magick_restrict start,const char ** magick_restrict end,const size_t extent,char * magick_restrict token)174 MagickExport magick_hot_spot size_t GetNextToken(
175   const char *magick_restrict start,const char **magick_restrict end,
176   const size_t extent,char *magick_restrict token)
177 {
178   double
179     value;
180 
181   char
182     *magick_restrict q;
183 
184   const char
185     *magick_restrict p;
186 
187   ssize_t
188     i;
189 
190   assert(start != (const char *) NULL);
191   assert(token != (char *) NULL);
192   i=0;
193   p=start;
194   while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
195     p++;
196   switch (*p)
197   {
198     case '\0':
199       break;
200     case '"':
201     case '\'':
202     case '`':
203     case '{':
204     {
205       char
206         escape;
207 
208       switch (*p)
209       {
210         case '"': escape='"'; break;
211         case '\'': escape='\''; break;
212         case '`': escape='\''; break;
213         case '{': escape='}'; break;
214         default: escape=(*p); break;
215       }
216       for (p++; *p != '\0'; p++)
217       {
218         if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
219           p++;
220         else
221           if (*p == escape)
222             {
223               p++;
224               break;
225             }
226         if (i < (ssize_t) (extent-1))
227           token[i++]=(*p);
228         if ((size_t) (p-start) >= (extent-1))
229           break;
230       }
231       break;
232     }
233     case '/':
234     {
235       if (i < (ssize_t) (extent-1))
236         token[i++]=(*p);
237       p++;
238       if ((*p == '>') || (*p == '/'))
239         {
240           if (i < (ssize_t) (extent-1))
241             token[i++]=(*p);
242           p++;
243         }
244       break;
245     }
246     default:
247     {
248       value=StringToDouble(p,&q);
249       (void) value;
250       if ((p != q) && (*p != ','))
251         {
252           for ( ; (p < q) && (*p != ','); p++)
253           {
254             if (i < (ssize_t) (extent-1))
255               token[i++]=(*p);
256             if ((size_t) (p-start) >= (extent-1))
257               break;
258           }
259           if (*p == '%')
260             {
261               if (i < (ssize_t) (extent-1))
262                 token[i++]=(*p);
263               p++;
264             }
265           break;
266         }
267       if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
268           (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
269         {
270           if (i < (ssize_t) (extent-1))
271             token[i++]=(*p);
272           p++;
273           break;
274         }
275       for ( ; *p != '\0'; p++)
276       {
277         if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
278             (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
279           break;
280         if ((i > 0) && (*p == '<'))
281           break;
282         if (i < (ssize_t) (extent-1))
283           token[i++]=(*p);
284         if (*p == '>')
285           break;
286         if (*p == '(')
287           {
288             for (p++; *p != '\0'; p++)
289             {
290               if (i < (ssize_t) (extent-1))
291                 token[i++]=(*p);
292               if ((*p == ')') && (*(p-1) != '\\'))
293                 break;
294               if ((size_t) (p-start) >= (extent-1))
295                 break;
296             }
297             if (*p == '\0')
298               break;
299           }
300         if ((size_t) (p-start) >= (extent-1))
301           break;
302       }
303       break;
304     }
305   }
306   token[i]='\0';
307   if (LocaleNCompare(token,"url(#",5) == 0)
308     {
309       q=strrchr(token,')');
310       if (q != (char *) NULL)
311         {
312           *q='\0';
313           (void) memmove(token,token+5,(size_t) (q-token-4));
314         }
315     }
316   while (isspace((int) ((unsigned char) *p)) != 0)
317     p++;
318   if (end != (const char **) NULL)
319     *end=(const char *) p;
320   return(p-start+1);
321 }
322 
323 /*
324 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
325 %                                                                             %
326 %                                                                             %
327 %                                                                             %
328 %   G l o b E x p r e s s i o n                                               %
329 %                                                                             %
330 %                                                                             %
331 %                                                                             %
332 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
333 %
334 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
335 %
336 %  The format of the GlobExpression function is:
337 %
338 %      MagickBooleanType GlobExpression(const char *magick_restrict expression,
339 %        const char *magick_restrict pattern,
340 %        const MagickBooleanType case_insensitive)
341 %
342 %  A description of each parameter follows:
343 %
344 %    o expression: Specifies a pointer to a text string containing a file name.
345 %
346 %    o pattern: Specifies a pointer to a text string containing a pattern.
347 %
348 %    o case_insensitive: set to MagickTrue to ignore the case when matching
349 %      an expression.
350 %
351 */
GlobExpression(const char * magick_restrict expression,const char * magick_restrict pattern,const MagickBooleanType case_insensitive)352 MagickExport MagickBooleanType GlobExpression(
353   const char *magick_restrict expression,const char *magick_restrict pattern,
354   const MagickBooleanType case_insensitive)
355 {
356   MagickBooleanType
357     done,
358     match;
359 
360   /*
361     Return on empty pattern or '*'.
362   */
363   if (pattern == (char *) NULL)
364     return(MagickTrue);
365   if (GetUTFCode(pattern) == 0)
366     return(MagickTrue);
367   if (LocaleCompare(pattern,"*") == 0)
368     return(MagickTrue);
369   if ((GetUTFCode(pattern+strlen(pattern)-1) == ']') &&
370       (strchr(pattern,'[') != (char *) NULL))
371     {
372       ExceptionInfo
373         *exception;
374 
375       ImageInfo
376         *image_info;
377 
378       /*
379         Determine if pattern is a scene, i.e. img0001.pcd[2].
380       */
381       image_info=AcquireImageInfo();
382       (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
383       exception=AcquireExceptionInfo();
384       (void) SetImageInfo(image_info,0,exception);
385       exception=DestroyExceptionInfo(exception);
386       if (LocaleCompare(image_info->filename,pattern) != 0)
387         {
388           image_info=DestroyImageInfo(image_info);
389           return(MagickFalse);
390         }
391       image_info=DestroyImageInfo(image_info);
392     }
393   /*
394     Evaluate glob expression.
395   */
396   done=MagickFalse;
397   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
398   {
399     if (GetUTFCode(expression) == 0)
400       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
401         break;
402     switch (GetUTFCode(pattern))
403     {
404       case '*':
405       {
406         MagickBooleanType
407           status;
408 
409         status=MagickFalse;
410         while (GetUTFCode(pattern) == '*')
411           pattern+=GetUTFOctets(pattern);
412         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
413         {
414           status=GlobExpression(expression,pattern,case_insensitive);
415           expression+=GetUTFOctets(expression);
416         }
417         if (status != MagickFalse)
418           {
419             while (GetUTFCode(expression) != 0)
420               expression+=GetUTFOctets(expression);
421             while (GetUTFCode(pattern) != 0)
422               pattern+=GetUTFOctets(pattern);
423           }
424         break;
425       }
426       case '[':
427       {
428         int
429           c;
430 
431         pattern+=GetUTFOctets(pattern);
432         for ( ; ; )
433         {
434           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
435             {
436               done=MagickTrue;
437               break;
438             }
439           if (GetUTFCode(pattern) == '\\')
440             {
441               pattern+=GetUTFOctets(pattern);
442               if (GetUTFCode(pattern) == 0)
443                 {
444                   done=MagickTrue;
445                   break;
446                 }
447              }
448           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
449             {
450               c=GetUTFCode(pattern);
451               pattern+=GetUTFOctets(pattern);
452               pattern+=GetUTFOctets(pattern);
453               if (GetUTFCode(pattern) == ']')
454                 {
455                   done=MagickTrue;
456                   break;
457                 }
458               if (GetUTFCode(pattern) == '\\')
459                 {
460                   pattern+=GetUTFOctets(pattern);
461                   if (GetUTFCode(pattern) == 0)
462                     {
463                       done=MagickTrue;
464                       break;
465                     }
466                 }
467               if ((GetUTFCode(expression) < c) ||
468                   (GetUTFCode(expression) > GetUTFCode(pattern)))
469                 {
470                   pattern+=GetUTFOctets(pattern);
471                   continue;
472                 }
473             }
474           else
475             if (GetUTFCode(pattern) != GetUTFCode(expression))
476               {
477                 pattern+=GetUTFOctets(pattern);
478                 continue;
479               }
480           pattern+=GetUTFOctets(pattern);
481           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
482           {
483             if ((GetUTFCode(pattern) == '\\') &&
484                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
485               pattern+=GetUTFOctets(pattern);
486             pattern+=GetUTFOctets(pattern);
487           }
488           if (GetUTFCode(pattern) != 0)
489             {
490               pattern+=GetUTFOctets(pattern);
491               expression+=GetUTFOctets(expression);
492             }
493           break;
494         }
495         break;
496       }
497       case '?':
498       {
499         pattern+=GetUTFOctets(pattern);
500         expression+=GetUTFOctets(expression);
501         break;
502       }
503       case '{':
504       {
505         char
506           *target;
507 
508         char
509           *p;
510 
511         target=AcquireString(pattern);
512         p=target;
513         pattern++;
514         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
515         {
516           *p++=(*pattern++);
517           if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
518             {
519               *p='\0';
520               match=GlobExpression(expression,target,case_insensitive);
521               if (match != MagickFalse)
522                 {
523                   expression+=MagickMin(strlen(expression),strlen(target));
524                   break;
525                 }
526               p=target;
527               pattern+=GetUTFOctets(pattern);
528             }
529         }
530         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
531           pattern+=GetUTFOctets(pattern);
532         if (GetUTFCode(pattern) != 0)
533           pattern+=GetUTFOctets(pattern);
534         target=DestroyString(target);
535         break;
536       }
537       case '\\':
538       {
539         pattern+=GetUTFOctets(pattern);
540         if (GetUTFCode(pattern) == 0)
541           break;
542       }
543       default:
544       {
545         if (case_insensitive != MagickFalse)
546           {
547             if (LocaleLowercase((int) GetUTFCode(expression)) != LocaleLowercase((int) GetUTFCode(pattern)))
548               {
549                 done=MagickTrue;
550                 break;
551               }
552           }
553         else
554           if (GetUTFCode(expression) != GetUTFCode(pattern))
555             {
556               done=MagickTrue;
557               break;
558             }
559         expression+=GetUTFOctets(expression);
560         pattern+=GetUTFOctets(pattern);
561       }
562     }
563   }
564   while (GetUTFCode(pattern) == '*')
565     pattern+=GetUTFOctets(pattern);
566   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
567     MagickTrue : MagickFalse;
568   return(match);
569 }
570 
571 /*
572 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
573 %                                                                             %
574 %                                                                             %
575 %                                                                             %
576 +     I s G l o b                                                             %
577 %                                                                             %
578 %                                                                             %
579 %                                                                             %
580 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581 %
582 %  IsGlob() returns MagickTrue if the path specification contains a globbing
583 %  pattern.
584 %
585 %  The format of the IsGlob method is:
586 %
587 %      MagickBooleanType IsGlob(const char *geometry)
588 %
589 %  A description of each parameter follows:
590 %
591 %    o path: the path.
592 %
593 */
IsGlob(const char * path)594 MagickPrivate MagickBooleanType IsGlob(const char *path)
595 {
596   MagickBooleanType
597     status = MagickFalse;
598 
599   const char
600     *p;
601 
602   if (IsPathAccessible(path) != MagickFalse)
603     return(MagickFalse);
604   for (p=path; *p != '\0'; p++)
605   {
606     switch (*p)
607     {
608       case '*':
609       case '?':
610       case '{':
611       case '}':
612       case '[':
613       case ']':
614       {
615         status=MagickTrue;
616         break;
617       }
618       default:
619         break;
620     }
621   }
622   return(status);
623 }
624 
625 /*
626 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
627 %                                                                             %
628 %                                                                             %
629 %                                                                             %
630 %   T o k e n i z e r                                                         %
631 %                                                                             %
632 %                                                                             %
633 %                                                                             %
634 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
635 %
636 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
637 %  one at a time from a string of characters.  The characters used for white
638 %  space, for break characters, and for quotes can be specified.  Also,
639 %  characters in the string can be preceded by a specifiable escape character
640 %  which removes any special meaning the character may have.
641 %
642 %  Here is some terminology:
643 %
644 %    o token: A single unit of information in the form of a group of
645 %      characters.
646 %
647 %    o white space: Apace that gets ignored (except within quotes or when
648 %      escaped), like blanks and tabs. in addition, white space terminates a
649 %      non-quoted token.
650 %
651 %    o break set: One or more characters that separates non-quoted tokens.
652 %      Commas are a common break character. The usage of break characters to
653 %      signal the end of a token is the same as that of white space, except
654 %      multiple break characters with nothing or only white space between
655 %      generate a null token for each two break characters together.
656 %
657 %      For example, if blank is set to be the white space and comma is set to
658 %      be the break character, the line
659 %
660 %        A, B, C ,  , DEF
661 %
662 %        ... consists of 5 tokens:
663 %
664 %        1)  "A"
665 %        2)  "B"
666 %        3)  "C"
667 %        4)  "" (the null string)
668 %        5)  "DEF"
669 %
670 %    o Quote character: A character that, when surrounding a group of other
671 %      characters, causes the group of characters to be treated as a single
672 %      token, no matter how many white spaces or break characters exist in
673 %      the group. Also, a token always terminates after the closing quote.
674 %      For example, if ' is the quote character, blank is white space, and
675 %      comma is the break character, the following string
676 %
677 %        A, ' B, CD'EF GHI
678 %
679 %        ... consists of 4 tokens:
680 %
681 %        1)  "A"
682 %        2)  " B, CD" (note the blanks & comma)
683 %        3)  "EF"
684 %        4)  "GHI"
685 %
686 %      The quote characters themselves do not appear in the resultant
687 %      tokens.  The double quotes are delimiters i use here for
688 %      documentation purposes only.
689 %
690 %    o Escape character: A character which itself is ignored but which
691 %      causes the next character to be used as is.  ^ and \ are often used
692 %      as escape characters. An escape in the last position of the string
693 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
694 %      and non-escape) character. For example, assume white space, break
695 %      character, and quote are the same as in the above examples, and
696 %      further, assume that ^ is the escape character. Then, in the string
697 %
698 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
699 %
700 %        ... there are 7 tokens:
701 %
702 %        1)  "ABC"
703 %        2)  " DEF ' GH"
704 %        3)  "I"
705 %        4)  " "     (a lone blank)
706 %        5)  "J"
707 %        6)  "K L"
708 %        7)  "^"     (passed as is at end of line)
709 %
710 %  The format of the Tokenizer method is:
711 %
712 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
713 %        const size_t max_token_length,const char *line,const char *white,
714 %        const char *break_set,const char *quote,const char escape,
715 %        char *breaker,int *next,char *quoted)
716 %
717 %  A description of each parameter follows:
718 %
719 %    o flag: right now, only the low order 3 bits are used.
720 %
721 %        1 => convert non-quoted tokens to upper case
722 %        2 => convert non-quoted tokens to lower case
723 %        0 => do not convert non-quoted tokens
724 %
725 %    o token: a character string containing the returned next token
726 %
727 %    o max_token_length: the maximum size of "token".  Characters beyond
728 %      "max_token_length" are truncated.
729 %
730 %    o string: the string to be parsed.
731 %
732 %    o white: a string of the valid white spaces.  example:
733 %
734 %        char whitesp[]={" \t"};
735 %
736 %      blank and tab will be valid white space.
737 %
738 %    o break: a string of the valid break characters. example:
739 %
740 %        char breakch[]={";,"};
741 %
742 %      semicolon and comma will be valid break characters.
743 %
744 %    o quote: a string of the valid quote characters. An example would be
745 %
746 %        char whitesp[]={"'\"");
747 %
748 %      (this causes single and double quotes to be valid) Note that a
749 %      token starting with one of these characters needs the same quote
750 %      character to terminate it.
751 %
752 %      for example:
753 %
754 %        "ABC '
755 %
756 %      is unterminated, but
757 %
758 %        "DEF" and 'GHI'
759 %
760 %      are properly terminated.  Note that different quote characters
761 %      can appear on the same line; only for a given token do the quote
762 %      characters have to be the same.
763 %
764 %    o escape: the escape character (NOT a string ... only one
765 %      allowed). Use zero if none is desired.
766 %
767 %    o breaker: the break character used to terminate the current
768 %      token.  If the token was quoted, this will be the quote used.  If
769 %      the token is the last one on the line, this will be zero.
770 %
771 %    o next: this variable points to the first character of the
772 %      next token.  it gets reset by "tokenizer" as it steps through the
773 %      string.  Set it to 0 upon initialization, and leave it alone
774 %      after that.  You can change it if you want to jump around in the
775 %      string or re-parse from the beginning, but be careful.
776 %
777 %    o quoted: set to True if the token was quoted and MagickFalse
778 %      if not.  You may need this information (for example:  in C, a
779 %      string with quotes around it is a character string, while one
780 %      without is an identifier).
781 %
782 %    o result: 0 if we haven't reached EOS (end of string), and 1
783 %      if we have.
784 %
785 */
786 
787 #define IN_WHITE 0
788 #define IN_TOKEN 1
789 #define IN_QUOTE 2
790 #define IN_OZONE 3
791 
sindex(int c,const char * string)792 static ssize_t sindex(int c,const char *string)
793 {
794   const char
795     *p;
796 
797   for (p=string; *p != '\0'; p++)
798     if (c == (int) (*p))
799       return((ssize_t) (p-string));
800   return(-1);
801 }
802 
StoreToken(TokenInfo * token_info,char * string,size_t max_token_length,int c)803 static void StoreToken(TokenInfo *token_info,char *string,
804   size_t max_token_length,int c)
805 {
806   ssize_t
807     i;
808 
809   if ((token_info->offset < 0) ||
810       ((size_t) token_info->offset >= (max_token_length-1)))
811     return;
812   i=token_info->offset++;
813   string[i]=(char) c;
814   if (token_info->state == IN_QUOTE)
815     return;
816   switch (token_info->flag & 0x03)
817   {
818     case 1:
819     {
820       string[i]=(char) LocaleUppercase(c);
821       break;
822     }
823     case 2:
824     {
825       string[i]=(char) LocaleLowercase(c);
826       break;
827     }
828     default:
829       break;
830   }
831 }
832 
Tokenizer(TokenInfo * token_info,const unsigned flag,char * token,const size_t max_token_length,const char * line,const char * white,const char * break_set,const char * quote,const char escape,char * breaker,int * next,char * quoted)833 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
834   char *token,const size_t max_token_length,const char *line,const char *white,
835   const char *break_set,const char *quote,const char escape,char *breaker,
836   int *next,char *quoted)
837 {
838   int
839     c;
840 
841   ssize_t
842     i;
843 
844   *breaker='\0';
845   *quoted='\0';
846   if (line[*next] == '\0')
847     return(1);
848   token_info->state=IN_WHITE;
849   token_info->quote=(char) MagickFalse;
850   token_info->flag=flag;
851   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
852   {
853     c=(int) line[*next];
854     i=sindex(c,break_set);
855     if (i >= 0)
856       {
857         switch (token_info->state)
858         {
859           case IN_WHITE:
860           case IN_TOKEN:
861           case IN_OZONE:
862           {
863             (*next)++;
864             *breaker=break_set[i];
865             token[token_info->offset]='\0';
866             return(0);
867           }
868           case IN_QUOTE:
869           {
870             StoreToken(token_info,token,max_token_length,c);
871             break;
872           }
873         }
874         continue;
875       }
876     i=sindex(c,quote);
877     if (i >= 0)
878       {
879         switch (token_info->state)
880         {
881           case IN_WHITE:
882           {
883             token_info->state=IN_QUOTE;
884             token_info->quote=quote[i];
885             *quoted=(char) MagickTrue;
886             break;
887           }
888           case IN_QUOTE:
889           {
890             if (quote[i] != token_info->quote)
891               StoreToken(token_info,token,max_token_length,c);
892             else
893               {
894                 token_info->state=IN_OZONE;
895                 token_info->quote='\0';
896               }
897             break;
898           }
899           case IN_TOKEN:
900           case IN_OZONE:
901           {
902             *breaker=(char) c;
903             token[token_info->offset]='\0';
904             return(0);
905           }
906         }
907         continue;
908       }
909     i=sindex(c,white);
910     if (i >= 0)
911       {
912         switch (token_info->state)
913         {
914           case IN_WHITE:
915           case IN_OZONE:
916             break;
917           case IN_TOKEN:
918           {
919             token_info->state=IN_OZONE;
920             break;
921           }
922           case IN_QUOTE:
923           {
924             StoreToken(token_info,token,max_token_length,c);
925             break;
926           }
927         }
928         continue;
929       }
930     if (c == (int) escape)
931       {
932         if (line[(*next)+1] == '\0')
933           {
934             *breaker='\0';
935             StoreToken(token_info,token,max_token_length,c);
936             (*next)++;
937             token[token_info->offset]='\0';
938             return(0);
939           }
940         switch (token_info->state)
941         {
942           case IN_WHITE:
943           {
944             (*next)--;
945             token_info->state=IN_TOKEN;
946             break;
947           }
948           case IN_TOKEN:
949           case IN_QUOTE:
950           {
951             (*next)++;
952             c=(int) line[*next];
953             StoreToken(token_info,token,max_token_length,c);
954             break;
955           }
956           case IN_OZONE:
957           {
958             token[token_info->offset]='\0';
959             return(0);
960           }
961         }
962         continue;
963       }
964     switch (token_info->state)
965     {
966       case IN_WHITE:
967       {
968         token_info->state=IN_TOKEN;
969         StoreToken(token_info,token,max_token_length,c);
970         break;
971       }
972       case IN_TOKEN:
973       case IN_QUOTE:
974       {
975         StoreToken(token_info,token,max_token_length,c);
976         break;
977       }
978       case IN_OZONE:
979       {
980         token[token_info->offset]='\0';
981         return(0);
982       }
983     }
984   }
985   token[token_info->offset]='\0';
986   return(0);
987 }
988