1 /*========================================================================= 2 3 Program: Visualization Toolkit 4 Module: vtkParseString.h 5 6 Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen 7 All rights reserved. 8 See Copyright.txt or http://www.kitware.com/Copyright.htm for details. 9 10 This software is distributed WITHOUT ANY WARRANTY; without even 11 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 12 PURPOSE. See the above copyright notice for more information. 13 14 =========================================================================*/ 15 /*------------------------------------------------------------------------- 16 Copyright (c) 2012 David Gobbi. 17 18 Contributed to the VisualizationToolkit by the author in April 2012 19 under the terms of the Visualization Toolkit 2008 copyright. 20 -------------------------------------------------------------------------*/ 21 22 /** 23 This file provides string handling routines. 24 25 The two important jobs done by these routines are string tokenization 26 and string cacheing. 27 28 Tokenization is done as per the rules of a C++ preprocessor, and 29 breaks the strings into ids, literals, and operators. Any string 30 is a valid input for the tokenizer, and it is up to the parser to 31 decide if the resulting tokens are valid within the grammar. The 32 two primary tokenization functions are vtkParse_InitTokenizer() 33 and vtkParse_NextToken(). 34 35 Cacheing refers to how string memory management is done. The 36 parser uses "const char *" for all strings, and expects all strings 37 to be persistent and constant. These conditions are automatically 38 met by static strings, but dynamically-generated strings must be 39 cached until the parse is complete. The primary cacheing functions 40 are vtkParse_CacheString() and vtkParse_FreeStringCache(). 41 */ 42 43 #ifndef VTK_PARSE_STRING_H 44 #define VTK_PARSE_STRING_H 45 46 #include <stddef.h> 47 48 #ifdef __cplusplus 49 extern "C" { 50 #endif 51 52 /** 53 * Various important char types for tokenization 54 */ 55 typedef enum _parse_char_type 56 { 57 CPRE_ID = 0x01, /* A-Z a-z and _ */ 58 CPRE_DIGIT = 0x02, /* 0-9 */ 59 CPRE_IDGIT = 0x03, /* 0-9 A-Z a-z and _ */ 60 CPRE_HEX = 0x04, /* 0-9A-Fa-f */ 61 CPRE_EXP = 0x08, /* EPep (exponents for floats) */ 62 CPRE_SIGN = 0x10, /* +- (sign for floats) */ 63 CPRE_QUOTE = 0x20, /* " and ' */ 64 CPRE_HSPACE = 0x40, /* space, tab, carriage return */ 65 CPRE_VSPACE = 0x80, /* newline, vertical tab, form feed */ 66 CPRE_WHITE = 0xC0, /* all whitespace characters */ 67 } parse_char_type; 68 69 /** 70 * Character type lookup table 71 */ 72 extern unsigned char parse_charbits[256]; 73 74 /** 75 * Macro to check if a char is of a certain type 76 */ 77 #define vtkParse_CharType(c, bits) \ 78 ((parse_charbits[(unsigned char)(c)] & (bits)) != 0) 79 80 /** 81 * Whitespace types that can be used with the tokenizer. 82 * - WS_DEFAULT treats newlines and formfeeds as regular whitespace. 83 * - WS_PREPROC treats newline as end-of-line, not as whitespace. 84 * - WS_COMMENT treats comments as tokens, not as whitespace. 85 */ 86 typedef enum _parse_space_t 87 { 88 WS_DEFAULT = CPRE_WHITE, /* skip all whitespace */ 89 WS_PREPROC = CPRE_HSPACE, /* skip horizontal whitespace only */ 90 WS_COMMENT = (CPRE_WHITE | 0x100), /* comments as tokens */ 91 } parse_space_t; 92 93 /** 94 * Preprocessor tokens for C++. 95 */ 96 typedef enum _preproc_token_t 97 { 98 TOK_OTHER = 257, 99 TOK_ID, /* any id */ 100 TOK_CHAR, /* char literal */ 101 TOK_STRING, /* string literal */ 102 TOK_NUMBER, /* any numeric literal */ 103 TOK_COMMENT, /* C or C++ comment */ 104 TOK_DBLHASH, /* ## */ 105 TOK_SCOPE, /* :: */ 106 TOK_INCR, /* ++ */ 107 TOK_DECR, /* -- */ 108 TOK_RSHIFT, /* >> */ 109 TOK_LSHIFT, /* << */ 110 TOK_AND, /* && */ 111 TOK_OR, /* || */ 112 TOK_EQ, /* == */ 113 TOK_NE, /* != */ 114 TOK_GE, /* >= */ 115 TOK_LE, /* <= */ 116 TOK_ADD_EQ, /* += */ 117 TOK_SUB_EQ, /* -= */ 118 TOK_MUL_EQ, /* *= */ 119 TOK_DIV_EQ, /* /= */ 120 TOK_MOD_EQ, /* %= */ 121 TOK_AND_EQ, /* &= */ 122 TOK_OR_EQ, /* |= */ 123 TOK_XOR_EQ, /* ^= */ 124 TOK_ARROW, /* -> */ 125 TOK_DOT_STAR, /* .* */ 126 TOK_ARROW_STAR,/* ->* */ 127 TOK_RSHIFT_EQ, /* >>= */ 128 TOK_LSHIFT_EQ, /* <<= */ 129 TOK_ELLIPSIS, /* ... */ 130 } preproc_token_t; 131 132 /** 133 * A struct for going through a string one token at a time. 134 * If ws is set to WS_PREPROC, then tokenization stops when a 135 * newline or null is encountered. If ws is set to WS_DEFAULT, 136 * then tokenization only stops when a null is encountered. If 137 * ws is set to WS_COMMENT, then tokenization stops only when 138 * a null is encountered, and comments are returned as tokens 139 * instead of being skipped as whitespace. 140 */ 141 typedef struct _StringTokenizer 142 { 143 int tok; /* the current token */ 144 unsigned int hash; /* the hash of the current token, if it is an id */ 145 const char *text; /* the text for the current token, not null-teminated */ 146 size_t len; /* the length of the current token */ 147 parse_space_t ws; /* controls what to consider as whitespace */ 148 } StringTokenizer; 149 150 /** 151 * Initialize the tokenizer and get the first token. 152 */ 153 void vtkParse_InitTokenizer( 154 StringTokenizer *tokens, const char *text, parse_space_t wstype); 155 156 /** 157 * Return the next preprocessor token, or '0' if none left. 158 */ 159 int vtkParse_NextToken(StringTokenizer *tokens); 160 161 /** 162 * Skip over whitespace. 163 * Return the number of chars until the first non-whitespace token. 164 * Set spacetype to WS_DEFAULT, WS_PREPROC, or WS_COMMENT. 165 */ 166 size_t vtkParse_SkipWhitespace( 167 const char *cp, parse_space_t spacetype); 168 169 /** 170 * Skip over a comment, C style or C++ style. 171 * Return the number of chars until the end of the comment. 172 */ 173 size_t vtkParse_SkipComment(const char *cp); 174 175 /** 176 * Skip over a string in double or single quotes. 177 * Return the number of chars until the end of the quotes. 178 */ 179 size_t vtkParse_SkipQuotes(const char *cp); 180 181 /** 182 * Skip over a number. Uses preprocessor semantics. 183 * Return the number of chars until the end of the number. 184 */ 185 size_t vtkParse_SkipNumber(const char *cp); 186 187 /** 188 * Skip over an identifier. 189 * Return the number of chars until the end of the identifier. 190 */ 191 size_t vtkParse_SkipId(const char *cp); 192 193 /** 194 * Compute the hash for a id, for use in hash table lookups. 195 * This stops at the first non-Id character, so it is safe to use 196 * on a string that is not null-terminated as long as there is either 197 * whitespace or an operator character before the end of the string. 198 * It can be used on null-terminated strings as well, of course. 199 */ 200 unsigned int vtkParse_HashId(const char *cp); 201 202 203 /** 204 * StringCache provides a simple way of allocating strings centrally. 205 * It eliminates the need to allocate and free each individual string, 206 * which makes the code simpler and more efficient. 207 */ 208 typedef struct _StringCache 209 { 210 unsigned long NumberOfChunks; 211 char **Chunks; 212 size_t ChunkSize; 213 size_t Position; 214 } StringCache; 215 216 /** 217 * Initialize the string cache. 218 */ 219 void vtkParse_InitStringCache(StringCache *cache); 220 221 /** 222 * Alocate a new string from the cache. 223 * A total of n+1 bytes will be allocated, to leave room for null. 224 */ 225 char *vtkParse_NewString(StringCache *cache, size_t n); 226 227 /** 228 * Cache a string so that it can then be used in the vtkParse data 229 * structures. The string will last until the application exits. 230 * At most 'n' chars will be copied, and the string will be terminated. 231 * If a null pointer is provided, then a null pointer will be returned. 232 */ 233 const char *vtkParse_CacheString( 234 StringCache *cache, const char *cp, size_t n); 235 236 /** 237 * Free all strings that were created with vtkParse_NewString() or 238 * with vtkParse_CacheString(). 239 */ 240 void vtkParse_FreeStringCache(StringCache *cache); 241 242 #ifdef __cplusplus 243 } /* extern "C" */ 244 #endif 245 246 #endif 247