1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _h_klib_token_
28 #define _h_klib_token_
29 
30 #ifndef _h_klib_extern_
31 #include <klib/extern.h>
32 #endif
33 
34 #ifndef _h_klib_text_
35 #include <klib/text.h>
36 #endif
37 
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 
44 /*--------------------------------------------------------------------------
45  * forwards
46  */
47 struct KSymbol;
48 
49 
50 /*--------------------------------------------------------------------------
51  * KTokenText
52  *  named text
53  *
54  *  it's likely that the name will need to become a refcounted object...
55  */
56 typedef struct KTokenText KTokenText;
57 struct KTokenText
58 {
59     String str;
60     String path;
61 
62     rc_t ( CC * read ) ( void *self, KTokenText *tt, size_t save );
63     void *data;
64 };
65 
66 
67 /* Init
68  *  initialize structure
69  */
70 #define KTokenTextInit( tt, text, fpath ) \
71     ( void ) ( ( tt ) -> str = * ( text ), \
72                ( tt ) -> path = * ( fpath ), \
73                ( tt ) -> read = NULL, \
74                ( tt ) -> data = NULL )
75 #ifndef KTokenTextInit
76 KLIB_EXTERN void CC KTokenTextInit ( KTokenText *tt, const String *text, const String *path );
77 #endif
78 
79 #define KTokenTextInitCString( tt, text, fpath ) \
80     ( void ) ( StringInitCString ( & ( tt ) -> str, text ), \
81                StringInitCString ( & ( tt ) -> path, fpath ), \
82                ( tt ) -> read = NULL, \
83                ( tt ) -> data = NULL )
84 #ifndef KTokenTextInitCString
85 KLIB_EXTERN void CC KTokenTextInitCString ( KTokenText *tt, const char *text, const char *path );
86 #endif
87 
88 /*--------------------------------------------------------------------------
89  * KTokenID
90  *  pre-defined ids for text tokens
91  *  not every tokenizer will return all tokens
92  */
93 enum KTokenID
94 {
95     eEndOfInput, eUnrecognized, eUntermComment,
96 
97     /* space separators */
98     eWhiteSpace, eEndOfLine,
99 
100     /* numeric */
101     eDecimal, eHex, eOctal, eFloat, eExpFloat, eMajMinRel,
102 
103     /* string */
104     eUntermString, eString, eUntermEscapedString, eEscapedString,
105 
106     /* names */
107     eIdent, eName,
108 
109     /* punctuation */
110     ePeriod, eComma, eColon, eSemiColon, eQuestion, eTilde, eExclam,
111     eAtSign, eHash, eDollar, ePercent, eCaret, eAmpersand, eAsterisk,
112     ePlus, eMinus, eAssign, eFwdSlash, eBackSlash, ePipe,
113 
114     /* paired punctuation */
115     eLeftParen, eRightParen,
116     eLeftCurly, eRightCurly,
117     eLeftAngle, eRightAngle,
118     eLeftSquare, eRightSquare,
119 
120     /* compound tokens */
121     eDblPeriod, eDblColon, eEllipsis, eLogAnd, eLogOr,
122     eEqual, eNotEqual, eColonAssign, ePlusAssign, eOverArrow,
123 
124     eDblLeftAngle, eDblRightAngle,
125     eDblLeftSquare, eDblRightSquare,
126 
127     /* first free id */
128     eNumTokenIDs,
129 
130     /* namespace type - needed by KSymTable */
131     eNamespace = eNumTokenIDs,
132     eNumSymtabIDs
133 };
134 
135 
136 /*--------------------------------------------------------------------------
137  * KToken
138  *  a string with an id and source information
139  */
140 typedef struct KToken KToken;
141 struct KToken
142 {
143     const KTokenText *txt;
144     struct KSymbol *sym;
145     String str;
146     uint32_t id;
147     uint32_t lineno;
148 };
149 
150 
151 /* conversion operators
152  *  since the constants have a type id, the entire token is used
153  */
154 KLIB_EXTERN rc_t CC KTokenToI32 ( const KToken *self, int32_t *i );
155 KLIB_EXTERN rc_t CC KTokenToU32 ( const KToken *self, uint32_t *i );
156 KLIB_EXTERN rc_t CC KTokenToI64 ( const KToken *self, int64_t *i );
157 KLIB_EXTERN rc_t CC KTokenToU64 ( const KToken *self, uint64_t *i );
158 KLIB_EXTERN rc_t CC KTokenToF64 ( const KToken *self, double *d );
159 KLIB_EXTERN rc_t CC KTokenToVersion ( const KToken *self, uint32_t *v );
160 KLIB_EXTERN rc_t CC KTokenToString ( const KToken *self, char *buffer, size_t bsize, size_t *size );
161 KLIB_EXTERN rc_t CC KTokenToWideString ( const KToken *self, uint32_t *buffer, uint32_t blen, uint32_t *len );
162 
163 
164 /*--------------------------------------------------------------------------
165  * KTokenSource
166  *  a modifiable source of tokens
167  */
168 typedef struct KTokenSource KTokenSource;
169 struct KTokenSource
170 {
171     const KTokenText *txt;
172     String str;
173     uint32_t lineno;
174 };
175 
176 /* Init
177  */
178 #define KTokenSourceInit( self, tt ) \
179     ( void ) ( ( self ) -> str = ( tt ) -> str, \
180                ( self ) -> txt = ( tt ), \
181                ( self ) -> lineno = 1 )
182 #ifndef KTokenSourceInit
183 KLIB_EXTERN void CC KTokenSourceInit ( KTokenSource *self, const KTokenText *txt );
184 #endif
185 
186 /* Return
187  *  returns token to source
188  */
189 KLIB_EXTERN void CC KTokenSourceReturn ( KTokenSource *self, const KToken *t );
190 
191 
192 /* Consume
193  *  consumes all remaining data
194  */
195 KLIB_EXTERN void CC KTokenSourceConsume ( KTokenSource *self );
196 
197 
198 /* Avail
199  *  the number of characters available
200  */
201 #define KTokenSourceAvail( self ) \
202     ( ( ( const KTokenSource* ) ( self ) ) -> str . len )
203 #ifndef KTokenSourceAvail
204 KLIB_EXTERN uint32_t CC KTokenSourceAvail ( const KTokenSource *self );
205 #endif
206 
207 /*--------------------------------------------------------------------------
208  * KTokenizer
209  *  an encapsulation of the tokenizer code
210  */
211 typedef struct KTokenizer KTokenizer;
212 
213 
214 /* Next
215  *  scan for next token
216  *
217  *  "src" [ IN ] - source of token text
218  *
219  *  "t" [ OUT ] - scanned token
220  *
221  *  returns a pointer to "t" for convenience
222  */
223 KLIB_EXTERN KToken* CC KTokenizerNext ( const KTokenizer *self, KTokenSource *src, KToken *t );
224 
225 
226 /* kDefaultTokenizer
227  *  a constant KTokenizer* to obtain default behavior
228  */
229 #define kDefaultTokenizer ( ( const KTokenizer* ) 0 )
230 
231 
232 /* kLineTokenizer
233  *  behaves like default tokenizer
234  *  except that eEndOfLine tokens are returned
235  */
236 #define kLineTokenizer ( ( const KTokenizer* ) 1 )
237 
238 
239 /* kPOSIXPathTokenizer
240  *  tokenizes a POSIX path string
241  */
242 #define kPOSIXPathTokenizer ( ( const KTokenizer* ) 2 )
243 
244 /* kKfgTokenizer
245  * tokenizes a KFG config file
246  * with its name value pairs and comments
247  */
248 #define kKfgTokenizer  ( ( const KTokenizer* ) 3 )
249 
250 #ifdef __cplusplus
251 }
252 #endif
253 
254 #endif /* _h_klib_token_ */
255