1 /*=========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 27 #ifndef _h_klib_token_ 28 #define _h_klib_token_ 29 30 #ifndef _h_klib_extern_ 31 #include <klib/extern.h> 32 #endif 33 34 #ifndef _h_klib_text_ 35 #include <klib/text.h> 36 #endif 37 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 44 /*-------------------------------------------------------------------------- 45 * forwards 46 */ 47 struct KSymbol; 48 49 50 /*-------------------------------------------------------------------------- 51 * KTokenText 52 * named text 53 * 54 * it's likely that the name will need to become a refcounted object... 55 */ 56 typedef struct KTokenText KTokenText; 57 struct KTokenText 58 { 59 String str; 60 String path; 61 62 rc_t ( CC * read ) ( void *self, KTokenText *tt, size_t save ); 63 void *data; 64 }; 65 66 67 /* Init 68 * initialize structure 69 */ 70 #define KTokenTextInit( tt, text, fpath ) \ 71 ( void ) ( ( tt ) -> str = * ( text ), \ 72 ( tt ) -> path = * ( fpath ), \ 73 ( tt ) -> read = NULL, \ 74 ( tt ) -> data = NULL ) 75 #ifndef KTokenTextInit 76 KLIB_EXTERN void CC KTokenTextInit ( KTokenText *tt, const String *text, const String *path ); 77 #endif 78 79 #define KTokenTextInitCString( tt, text, fpath ) \ 80 ( void ) ( StringInitCString ( & ( tt ) -> str, text ), \ 81 StringInitCString ( & ( tt ) -> path, fpath ), \ 82 ( tt ) -> read = NULL, \ 83 ( tt ) -> data = NULL ) 84 #ifndef KTokenTextInitCString 85 KLIB_EXTERN void CC KTokenTextInitCString ( KTokenText *tt, const char *text, const char *path ); 86 #endif 87 88 /*-------------------------------------------------------------------------- 89 * KTokenID 90 * pre-defined ids for text tokens 91 * not every tokenizer will return all tokens 92 */ 93 enum KTokenID 94 { 95 eEndOfInput, eUnrecognized, eUntermComment, 96 97 /* space separators */ 98 eWhiteSpace, eEndOfLine, 99 100 /* numeric */ 101 eDecimal, eHex, eOctal, eFloat, eExpFloat, eMajMinRel, 102 103 /* string */ 104 eUntermString, eString, eUntermEscapedString, eEscapedString, 105 106 /* names */ 107 eIdent, eName, 108 109 /* punctuation */ 110 ePeriod, eComma, eColon, eSemiColon, eQuestion, eTilde, eExclam, 111 eAtSign, eHash, eDollar, ePercent, eCaret, eAmpersand, eAsterisk, 112 ePlus, eMinus, eAssign, eFwdSlash, eBackSlash, ePipe, 113 114 /* paired punctuation */ 115 eLeftParen, eRightParen, 116 eLeftCurly, eRightCurly, 117 eLeftAngle, eRightAngle, 118 eLeftSquare, eRightSquare, 119 120 /* compound tokens */ 121 eDblPeriod, eDblColon, eEllipsis, eLogAnd, eLogOr, 122 eEqual, eNotEqual, eColonAssign, ePlusAssign, eOverArrow, 123 124 eDblLeftAngle, eDblRightAngle, 125 eDblLeftSquare, eDblRightSquare, 126 127 /* first free id */ 128 eNumTokenIDs, 129 130 /* namespace type - needed by KSymTable */ 131 eNamespace = eNumTokenIDs, 132 eNumSymtabIDs 133 }; 134 135 136 /*-------------------------------------------------------------------------- 137 * KToken 138 * a string with an id and source information 139 */ 140 typedef struct KToken KToken; 141 struct KToken 142 { 143 const KTokenText *txt; 144 struct KSymbol *sym; 145 String str; 146 uint32_t id; 147 uint32_t lineno; 148 }; 149 150 151 /* conversion operators 152 * since the constants have a type id, the entire token is used 153 */ 154 KLIB_EXTERN rc_t CC KTokenToI32 ( const KToken *self, int32_t *i ); 155 KLIB_EXTERN rc_t CC KTokenToU32 ( const KToken *self, uint32_t *i ); 156 KLIB_EXTERN rc_t CC KTokenToI64 ( const KToken *self, int64_t *i ); 157 KLIB_EXTERN rc_t CC KTokenToU64 ( const KToken *self, uint64_t *i ); 158 KLIB_EXTERN rc_t CC KTokenToF64 ( const KToken *self, double *d ); 159 KLIB_EXTERN rc_t CC KTokenToVersion ( const KToken *self, uint32_t *v ); 160 KLIB_EXTERN rc_t CC KTokenToString ( const KToken *self, char *buffer, size_t bsize, size_t *size ); 161 KLIB_EXTERN rc_t CC KTokenToWideString ( const KToken *self, uint32_t *buffer, uint32_t blen, uint32_t *len ); 162 163 164 /*-------------------------------------------------------------------------- 165 * KTokenSource 166 * a modifiable source of tokens 167 */ 168 typedef struct KTokenSource KTokenSource; 169 struct KTokenSource 170 { 171 const KTokenText *txt; 172 String str; 173 uint32_t lineno; 174 }; 175 176 /* Init 177 */ 178 #define KTokenSourceInit( self, tt ) \ 179 ( void ) ( ( self ) -> str = ( tt ) -> str, \ 180 ( self ) -> txt = ( tt ), \ 181 ( self ) -> lineno = 1 ) 182 #ifndef KTokenSourceInit 183 KLIB_EXTERN void CC KTokenSourceInit ( KTokenSource *self, const KTokenText *txt ); 184 #endif 185 186 /* Return 187 * returns token to source 188 */ 189 KLIB_EXTERN void CC KTokenSourceReturn ( KTokenSource *self, const KToken *t ); 190 191 192 /* Consume 193 * consumes all remaining data 194 */ 195 KLIB_EXTERN void CC KTokenSourceConsume ( KTokenSource *self ); 196 197 198 /* Avail 199 * the number of characters available 200 */ 201 #define KTokenSourceAvail( self ) \ 202 ( ( ( const KTokenSource* ) ( self ) ) -> str . len ) 203 #ifndef KTokenSourceAvail 204 KLIB_EXTERN uint32_t CC KTokenSourceAvail ( const KTokenSource *self ); 205 #endif 206 207 /*-------------------------------------------------------------------------- 208 * KTokenizer 209 * an encapsulation of the tokenizer code 210 */ 211 typedef struct KTokenizer KTokenizer; 212 213 214 /* Next 215 * scan for next token 216 * 217 * "src" [ IN ] - source of token text 218 * 219 * "t" [ OUT ] - scanned token 220 * 221 * returns a pointer to "t" for convenience 222 */ 223 KLIB_EXTERN KToken* CC KTokenizerNext ( const KTokenizer *self, KTokenSource *src, KToken *t ); 224 225 226 /* kDefaultTokenizer 227 * a constant KTokenizer* to obtain default behavior 228 */ 229 #define kDefaultTokenizer ( ( const KTokenizer* ) 0 ) 230 231 232 /* kLineTokenizer 233 * behaves like default tokenizer 234 * except that eEndOfLine tokens are returned 235 */ 236 #define kLineTokenizer ( ( const KTokenizer* ) 1 ) 237 238 239 /* kPOSIXPathTokenizer 240 * tokenizes a POSIX path string 241 */ 242 #define kPOSIXPathTokenizer ( ( const KTokenizer* ) 2 ) 243 244 /* kKfgTokenizer 245 * tokenizes a KFG config file 246 * with its name value pairs and comments 247 */ 248 #define kKfgTokenizer ( ( const KTokenizer* ) 3 ) 249 250 #ifdef __cplusplus 251 } 252 #endif 253 254 #endif /* _h_klib_token_ */ 255