1 /*=========================================================================
2 
3   Program:   Visualization Toolkit
4   Module:    vtkParseString.h
5 
6   Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
7   All rights reserved.
8   See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9 
10      This software is distributed WITHOUT ANY WARRANTY; without even
11      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12      PURPOSE.  See the above copyright notice for more information.
13 
14 =========================================================================*/
15 /*-------------------------------------------------------------------------
16   Copyright (c) 2012 David Gobbi.
17 
18   Contributed to the VisualizationToolkit by the author in April 2012
19   under the terms of the Visualization Toolkit 2008 copyright.
20 -------------------------------------------------------------------------*/
21 
22 /**
23   This file provides string handling routines.
24 
25   The two important jobs done by these routines are string tokenization
26   and string cacheing.
27 
28   Tokenization is done as per the rules of a C++ preprocessor, and
29   breaks the strings into ids, literals, and operators.  Any string
30   is a valid input for the tokenizer, and it is up to the parser to
31   decide if the resulting tokens are valid within the grammar.  The
32   two primary tokenization functions are vtkParse_InitTokenizer()
33   and vtkParse_NextToken().
34 
35   Cacheing refers to how string memory management is done.  The
36   parser uses "const char *" for all strings, and expects all strings
37   to be persistent and constant.  These conditions are automatically
38   met by static strings, but dynamically-generated strings must be
39   cached until the parse is complete.  The primary cacheing functions
40   are vtkParse_CacheString() and vtkParse_FreeStringCache().
41 */
42 
43 #ifndef VTK_PARSE_STRING_H
44 #define VTK_PARSE_STRING_H
45 
46 #include <stddef.h>
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 /**
53  * Various important char types for tokenization
54  */
55 typedef enum _parse_char_type
56 {
57   CPRE_ID       = 0x01,  /* A-Z a-z and _ */
58   CPRE_DIGIT    = 0x02,  /* 0-9 */
59   CPRE_IDGIT    = 0x03,  /* 0-9 A-Z a-z and _ */
60   CPRE_HEX      = 0x04,  /* 0-9A-Fa-f */
61   CPRE_EXP      = 0x08,  /* EPep (exponents for floats) */
62   CPRE_SIGN     = 0x10,  /* +- (sign for floats) */
63   CPRE_QUOTE    = 0x20,  /* " and ' */
64   CPRE_HSPACE   = 0x40,  /* space, tab, carriage return */
65   CPRE_VSPACE   = 0x80,  /* newline, vertical tab, form feed */
66   CPRE_WHITE    = 0xC0,  /* all whitespace characters */
67 } parse_char_type;
68 
69 /**
70  * Character type lookup table
71  */
72 extern unsigned char parse_charbits[256];
73 
74 /**
75  * Macro to check if a char is of a certain type
76  */
77 #define vtkParse_CharType(c, bits) \
78   ((parse_charbits[(unsigned char)(c)] & (bits)) != 0)
79 
80 /**
81  * Whitespace types that can be used with the tokenizer.
82  * - WS_DEFAULT treats newlines and formfeeds as regular whitespace.
83  * - WS_PREPROC treats newline as end-of-line, not as whitespace.
84  * - WS_COMMENT treats comments as tokens, not as whitespace.
85  */
86 typedef enum _parse_space_t
87 {
88   WS_DEFAULT = CPRE_WHITE,  /* skip all whitespace */
89   WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */
90   WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */
91 } parse_space_t;
92 
93 /**
94  * Preprocessor tokens for C++.
95  */
96 typedef enum _preproc_token_t
97 {
98   TOK_OTHER = 257,
99   TOK_ID,        /* any id */
100   TOK_CHAR,      /* char literal */
101   TOK_STRING,    /* string literal */
102   TOK_NUMBER,    /* any numeric literal */
103   TOK_COMMENT,   /* C or C++ comment */
104   TOK_DBLHASH,   /* ## */
105   TOK_SCOPE,     /* :: */
106   TOK_INCR,      /* ++ */
107   TOK_DECR,      /* -- */
108   TOK_RSHIFT,    /* >> */
109   TOK_LSHIFT,    /* << */
110   TOK_AND,       /* && */
111   TOK_OR,        /* || */
112   TOK_EQ,        /* == */
113   TOK_NE,        /* != */
114   TOK_GE,        /* >= */
115   TOK_LE,        /* <= */
116   TOK_ADD_EQ,    /* += */
117   TOK_SUB_EQ,    /* -= */
118   TOK_MUL_EQ,    /* *= */
119   TOK_DIV_EQ,    /* /= */
120   TOK_MOD_EQ,    /* %= */
121   TOK_AND_EQ,    /* &= */
122   TOK_OR_EQ,     /* |= */
123   TOK_XOR_EQ,    /* ^= */
124   TOK_ARROW,     /* -> */
125   TOK_DOT_STAR,  /* .* */
126   TOK_ARROW_STAR,/* ->* */
127   TOK_RSHIFT_EQ, /* >>= */
128   TOK_LSHIFT_EQ, /* <<= */
129   TOK_ELLIPSIS,  /* ... */
130 } preproc_token_t;
131 
132 /**
133  * A struct for going through a string one token at a time.
134  * If ws is set to WS_PREPROC, then tokenization stops when a
135  * newline or null is encountered.  If ws is set to WS_DEFAULT,
136  * then tokenization only stops when a null is encountered.  If
137  * ws is set to WS_COMMENT, then tokenization stops only when
138  * a null is encountered, and comments are returned as tokens
139  * instead of being skipped as whitespace.
140  */
141 typedef struct _StringTokenizer
142 {
143   int tok;           /* the current token */
144   unsigned int hash; /* the hash of the current token, if it is an id */
145   const char *text;  /* the text for the current token, not null-teminated */
146   size_t len;        /* the length of the current token */
147   parse_space_t ws;  /* controls what to consider as whitespace */
148 } StringTokenizer;
149 
150 /**
151  * Initialize the tokenizer and get the first token.
152  */
153 void vtkParse_InitTokenizer(
154   StringTokenizer *tokens, const char *text, parse_space_t wstype);
155 
156 /**
157  * Return the next preprocessor token, or '0' if none left.
158  */
159 int vtkParse_NextToken(StringTokenizer *tokens);
160 
161 /**
162  * Skip over whitespace.
163  * Return the number of chars until the first non-whitespace token.
164  * Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT.
165  */
166 size_t vtkParse_SkipWhitespace(
167   const char *cp, parse_space_t spacetype);
168 
169 /**
170  * Skip over a comment, C style or C++ style.
171  * Return the number of chars until the end of the comment.
172  */
173 size_t vtkParse_SkipComment(const char *cp);
174 
175 /**
176  * Skip over a string in double or single quotes.
177  * Return the number of chars until the end of the quotes.
178  */
179 size_t vtkParse_SkipQuotes(const char *cp);
180 
181 /**
182  * Skip over a number.  Uses preprocessor semantics.
183  * Return the number of chars until the end of the number.
184  */
185 size_t vtkParse_SkipNumber(const char *cp);
186 
187 /**
188  * Skip over an identifier.
189  * Return the number of chars until the end of the identifier.
190  */
191 size_t vtkParse_SkipId(const char *cp);
192 
193 /**
194  * Compute the hash for a id, for use in hash table lookups.
195  * This stops at the first non-Id character, so it is safe to use
196  * on a string that is not null-terminated as long as there is either
197  * whitespace or an operator character before the end of the string.
198  * It can be used on null-terminated strings as well, of course.
199  */
200 unsigned int vtkParse_HashId(const char *cp);
201 
202 
203 /**
204  * StringCache provides a simple way of allocating strings centrally.
205  * It eliminates the need to allocate and free each individual string,
206  * which makes the code simpler and more efficient.
207  */
208 typedef struct _StringCache
209 {
210   unsigned long  NumberOfChunks;
211   char         **Chunks;
212   size_t         ChunkSize;
213   size_t         Position;
214 } StringCache;
215 
216 /**
217  * Initialize the string cache.
218  */
219 void vtkParse_InitStringCache(StringCache *cache);
220 
221 /**
222  * Alocate a new string from the cache.
223  * A total of n+1 bytes will be allocated, to leave room for null.
224  */
225 char *vtkParse_NewString(StringCache *cache, size_t n);
226 
227 /**
228  * Cache a string so that it can then be used in the vtkParse data
229  * structures.  The string will last until the application exits.
230  * At most 'n' chars will be copied, and the string will be terminated.
231  * If a null pointer is provided, then a null pointer will be returned.
232  */
233 const char *vtkParse_CacheString(
234   StringCache *cache, const char *cp, size_t n);
235 
236 /**
237  * Free all strings that were created with vtkParse_NewString() or
238  * with vtkParse_CacheString().
239  */
240 void vtkParse_FreeStringCache(StringCache *cache);
241 
242 #ifdef __cplusplus
243 } /* extern "C" */
244 #endif
245 
246 #endif
247